diff options
Diffstat (limited to 'contrib/llvm/lib')
801 files changed, 62166 insertions, 34290 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index c189a00..bd132c0 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -237,6 +237,19 @@ AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { VI->getMetadata(LLVMContext::MD_tbaa)); } +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { + return Location(CXI->getPointerOperand(), + getTypeStoreSize(CXI->getCompareOperand()->getType()), + CXI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { + return Location(RMWI->getPointerOperand(), + getTypeStoreSize(RMWI->getValOperand()->getType()), + RMWI->getMetadata(LLVMContext::MD_tbaa)); +} AliasAnalysis::Location AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { @@ -268,8 +281,8 @@ AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { - // Be conservative in the face of volatile. - if (L->isVolatile()) + // Be conservative in the face of volatile/atomic. + if (!L->isUnordered()) return ModRef; // If the load address doesn't alias the given address, it doesn't read @@ -283,8 +296,8 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { - // Be conservative in the face of volatile. - if (S->isVolatile()) + // Be conservative in the face of volatile/atomic. + if (!S->isUnordered()) return ModRef; // If the store address cannot alias the pointer in question, then the @@ -317,6 +330,33 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { return ModRef; } +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { + // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. + if (CX->getOrdering() > Monotonic) + return ModRef; + + // If the cmpxchg address does not alias the location, it does not access it. + if (!alias(getLocation(CX), Loc)) + return NoModRef; + + return ModRef; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { + // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. + if (RMW->getOrdering() > Monotonic) + return ModRef; + + // If the atomicrmw address does not alias the location, it does not access it. + if (!alias(getLocation(RMW), Loc)) + return NoModRef; + + return ModRef; +} + + // AliasAnalysis destructor: DO NOT move this to the header file for // AliasAnalysis or else clients of the AliasAnalysis class may not depend on // the AliasAnalysis.o file in the current .a file, causing alias analysis @@ -341,7 +381,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// getTypeStoreSize - Return the TargetData store size for the given type, /// if known, or a conservative value otherwise. /// -uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) { +uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 1afc1b7..37271b9 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -171,12 +171,12 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { uint64_t I1Size = AliasAnalysis::UnknownSize; - const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { uint64_t I2Size = AliasAnalysis::UnknownSize; - const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); + Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { @@ -207,7 +207,7 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { uint64_t Size = AliasAnalysis::UnknownSize; - const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); + Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 2ed6949..3fcd3b5 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -56,12 +56,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AliasTy = MayAlias; } - if (CallSites.empty()) { // Merge call sites... - if (!AS.CallSites.empty()) - std::swap(CallSites, AS.CallSites); - } else if (!AS.CallSites.empty()) { - CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end()); - AS.CallSites.clear(); + if (UnknownInsts.empty()) { // Merge call sites... + if (!AS.UnknownInsts.empty()) + std::swap(UnknownInsts, AS.UnknownInsts); + } else if (!AS.UnknownInsts.empty()) { + UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end()); + AS.UnknownInsts.clear(); } AS.Forward = this; // Forward across AS now... @@ -123,13 +123,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, addRef(); // Entry points to alias set. } -void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { - CallSites.push_back(CS.getInstruction()); +void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { + UnknownInsts.push_back(I); - AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); - if (Behavior == AliasAnalysis::DoesNotAccessMemory) - return; - if (AliasAnalysis::onlyReadsMemory(Behavior)) { + if (!I->mayWriteToMemory()) { AliasTy = MayAlias; AccessTy |= Refs; return; @@ -147,7 +144,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo, AliasAnalysis &AA) const { if (AliasTy == MustAlias) { - assert(CallSites.empty() && "Illegal must alias set!"); + assert(UnknownInsts.empty() && "Illegal must alias set!"); // If this is a set of MustAliases, only check to see if the pointer aliases // SOME value in the set. @@ -167,10 +164,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, I.getTBAAInfo()))) return true; - // Check the call sites list and invoke list... - if (!CallSites.empty()) { - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) - if (AA.getModRefInfo(CallSites[i], + // Check the unknown instructions... + if (!UnknownInsts.empty()) { + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) + if (AA.getModRefInfo(UnknownInsts[i], AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != AliasAnalysis::NoModRef) return true; @@ -179,18 +176,20 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, return false; } -bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { - if (AA.doesNotAccessMemory(CS)) +bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { + if (!Inst->mayReadOrWriteMemory()) return false; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { - if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef || - AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef) + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { + CallSite C1 = getUnknownInst(i), C2 = Inst; + if (!C1 || !C2 || + AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || + AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) return true; } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != + if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) != AliasAnalysis::NoModRef) return true; @@ -244,10 +243,10 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, -AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { +AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = 0; for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward || !I->aliasesCallSite(CS, AA)) + if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) continue; if (FoundSet == 0) // If this is the first alias set ptr can go into. @@ -296,22 +295,28 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool AliasSetTracker::add(LoadInst *LI) { + if (LI->getOrdering() > Monotonic) return addUnknown(LI); + AliasSet::AccessType ATy = AliasSet::Refs; + if (!LI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), LI->getMetadata(LLVMContext::MD_tbaa), - AliasSet::Refs, NewPtr); + ATy, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; } bool AliasSetTracker::add(StoreInst *SI) { + if (SI->getOrdering() > Monotonic) return addUnknown(SI); + AliasSet::AccessType ATy = AliasSet::Mods; + if (!SI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), AA.getTypeStoreSize(Val->getType()), SI->getMetadata(LLVMContext::MD_tbaa), - AliasSet::Mods, NewPtr); + ATy, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; } @@ -325,20 +330,20 @@ bool AliasSetTracker::add(VAArgInst *VAAI) { } -bool AliasSetTracker::add(CallSite CS) { - if (isa<DbgInfoIntrinsic>(CS.getInstruction())) +bool AliasSetTracker::addUnknown(Instruction *Inst) { + if (isa<DbgInfoIntrinsic>(Inst)) return true; // Ignore DbgInfo Intrinsics. - if (AA.doesNotAccessMemory(CS)) + if (!Inst->mayReadOrWriteMemory()) return true; // doesn't alias anything - AliasSet *AS = findAliasSetForCallSite(CS); + AliasSet *AS = findAliasSetForUnknownInst(Inst); if (AS) { - AS->addCallSite(CS, AA); + AS->addUnknownInst(Inst, AA); return false; } AliasSets.push_back(new AliasSet()); AS = &AliasSets.back(); - AS->addCallSite(CS, AA); + AS->addUnknownInst(Inst, AA); return true; } @@ -348,13 +353,9 @@ bool AliasSetTracker::add(Instruction *I) { return add(LI); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return add(SI); - if (CallInst *CI = dyn_cast<CallInst>(I)) - return add(CI); - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - return add(II); if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return add(VAAI); - return true; + return addUnknown(I); } void AliasSetTracker::add(BasicBlock &BB) { @@ -375,8 +376,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { AliasSet &AS = const_cast<AliasSet&>(*I); // If there are any call sites in the alias set, add them to this AST. - for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) - add(AS.CallSites[i]); + for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i) + add(AS.UnknownInsts[i]); // Loop over all of the pointers in this alias set. bool X; @@ -393,7 +394,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { /// tracker. void AliasSetTracker::remove(AliasSet &AS) { // Drop all call sites. - AS.CallSites.clear(); + AS.UnknownInsts.clear(); // Clear the alias set. unsigned NumRefs = 0; @@ -453,11 +454,11 @@ bool AliasSetTracker::remove(VAArgInst *VAAI) { return true; } -bool AliasSetTracker::remove(CallSite CS) { - if (AA.doesNotAccessMemory(CS)) +bool AliasSetTracker::removeUnknown(Instruction *I) { + if (!I->mayReadOrWriteMemory()) return false; // doesn't alias anything - AliasSet *AS = findAliasSetForCallSite(CS); + AliasSet *AS = findAliasSetForUnknownInst(I); if (!AS) return false; remove(*AS); return true; @@ -469,11 +470,9 @@ bool AliasSetTracker::remove(Instruction *I) { return remove(LI); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return remove(SI); - if (CallInst *CI = dyn_cast<CallInst>(I)) - return remove(CI); if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return remove(VAAI); - return true; + return removeUnknown(I); } @@ -488,13 +487,13 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { // If this is a call instruction, remove the callsite from the appropriate // AliasSet (if present). - if (CallSite CS = PtrVal) { - if (!AA.doesNotAccessMemory(CS)) { + if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { + if (Inst->mayReadOrWriteMemory()) { // Scan all the alias sets to see if this call site is contained. for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward) continue; - I->removeCallSite(CS); + I->removeUnknownInst(Inst); } } } @@ -571,11 +570,11 @@ void AliasSet::print(raw_ostream &OS) const { OS << ", " << I.getSize() << ")"; } } - if (!CallSites.empty()) { - OS << "\n " << CallSites.size() << " Call Sites: "; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (!UnknownInsts.empty()) { + OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (i) OS << ", "; - WriteAsOperand(OS, CallSites[i]); + WriteAsOperand(OS, UnknownInsts[i]); } } OS << "\n"; diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 71e0a83..0ba6af9 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm-c/Analysis.h" +#include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" #include <cstring> @@ -23,7 +24,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); - initializeBlockFrequencyPass(Registry); + initializeBlockFrequencyInfoPass(Registry); initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 8330ea7..af400ba 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" @@ -100,7 +101,7 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD) { - const Type *AccessTy; + Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { if (!GV->hasDefinitiveInitializer()) return AliasAnalysis::UnknownSize; @@ -317,7 +318,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, E = GEPOp->op_end(); I != E; ++I) { Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; @@ -374,7 +375,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } if (Scale) { - VariableGEPIndex Entry = {Index, Extension, Scale}; + VariableGEPIndex Entry = {Index, Extension, + static_cast<int64_t>(Scale)}; VarIndices.push_back(Entry); } } @@ -467,6 +469,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); } virtual AliasResult alias(const Location &LocA, @@ -549,10 +552,15 @@ namespace { // Register this pass... char BasicAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", +INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (stateless AA impl)", + false, true, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) + ImmutablePass *llvm::createBasicAliasAnalysisPass() { return new BasicAliasAnalysis(); } @@ -706,7 +714,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(Location(cast<Value>(CI)), Loc)) { + if (!isNoAlias(Location(*CI), Location(Object))) { PassedAsArg = true; break; } @@ -716,6 +724,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); ModRefResult Min = ModRef; // Finally, handle specific knowledge of intrinsics. @@ -754,26 +763,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // We know that memset doesn't load anything. Min = Mod; break; - case Intrinsic::atomic_cmp_swap: - case Intrinsic::atomic_swap: - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: - if (TD) { - Value *Op1 = II->getArgOperand(0); - uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType()); - MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa); - if (isNoAlias(Location(Op1, Op1Size, Tag), Loc)) - return NoModRef; - } - break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { @@ -818,6 +807,39 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, } } + // We can bound the aliasing properties of memset_pattern16 just as we can + // for memcpy/memset. This is particularly important because the + // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 + // whenever possible. + else if (TLI.has(LibFunc::memset_pattern16) && + CS.getCalledFunction() && + CS.getCalledFunction()->getName() == "memset_pattern16") { + const Function *MS = CS.getCalledFunction(); + FunctionType *MemsetType = MS->getFunctionType(); + if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && + isa<PointerType>(MemsetType->getParamType(0)) && + isa<PointerType>(MemsetType->getParamType(1)) && + isa<IntegerType>(MemsetType->getParamType(2))) { + uint64_t Len = UnknownSize; + if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2))) + Len = LenCI->getZExtValue(); + const Value *Dest = CS.getArgument(0); + const Value *Src = CS.getArgument(1); + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + // Always reads 16 bytes of the source. + if (isNoAlias(Location(Src, 16), Loc)) + return NoModRef; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + // Always reads 16 bytes of the source. + } else if (isNoAlias(Location(Src, 16), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; + } + } + } + // The AliasAnalysis base class has some smarts, lets use them. return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } @@ -913,43 +935,43 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - // If there is a difference between the pointers, but the difference is - // less than the size of the associated memory object, then we know - // that the objects are partially overlapping. + // If there is a constant difference between the pointers, but the difference + // is less than the size of the associated memory object, then we know + // that the objects are partially overlapping. If the difference is + // greater, we know they do not overlap. if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { - if (GEP1BaseOffset >= 0 ? - (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) : - (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size && - GEP1BaseOffset != INT64_MIN)) - return PartialAlias; + if (GEP1BaseOffset >= 0) { + if (V2Size != UnknownSize) { + if ((uint64_t)GEP1BaseOffset < V2Size) + return PartialAlias; + return NoAlias; + } + } else { + if (V1Size != UnknownSize) { + if (-(uint64_t)GEP1BaseOffset < V1Size) + return PartialAlias; + return NoAlias; + } + } } - // If we have a known constant offset, see if this offset is larger than the - // access size being queried. If so, and if no variable indices can remove - // pieces of this constant, then we know we have a no-alias. For example, - // &A[100] != &A. - - // In order to handle cases like &A[100][i] where i is an out of range - // subscript, we have to ignore all constant offset pieces that are a multiple - // of a scaled index. Do this by removing constant offsets that are a - // multiple of any of our variable indices. This allows us to transform - // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1 - // provides an offset of 4 bytes (assuming a <= 4 byte access). - for (unsigned i = 0, e = GEP1VariableIndices.size(); - i != e && GEP1BaseOffset;++i) - if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale) - GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale; - - // If our known offset is bigger than the access size, we know we don't have - // an alias. - if (GEP1BaseOffset) { - if (GEP1BaseOffset >= 0 ? - (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) : - (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size && - GEP1BaseOffset != INT64_MIN)) + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. + if (!GEP1VariableIndices.empty()) { + uint64_t Modulo = 0; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) + Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + Modulo = Modulo ^ (Modulo & (Modulo - 1)); + + // We can compute the difference between the two addresses + // mod Modulo. Check whether that difference guarantees that the + // two locations do not alias. + uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); + if (V1Size != UnknownSize && V2Size != UnknownSize && + ModOffset >= V2Size && V1Size <= Modulo - ModOffset) return NoAlias; } - + // Statically, we can see that the base objects are the same, but the // pointers have dynamic offsets which we can't resolve. And none of our // little tricks above worked. diff --git a/contrib/llvm/lib/Analysis/BlockFrequency.cpp b/contrib/llvm/lib/Analysis/BlockFrequency.cpp deleted file mode 100644 index 4b86d1d..0000000 --- a/contrib/llvm/lib/Analysis/BlockFrequency.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Loops should be simplified before this analysis. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" -#include "llvm/Analysis/BlockFrequency.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" - -using namespace llvm; - -INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis", - true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis", - true, true) - -char BlockFrequency::ID = 0; - - -BlockFrequency::BlockFrequency() : FunctionPass(ID) { - initializeBlockFrequencyPass(*PassRegistry::getPassRegistry()); - BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); -} - -BlockFrequency::~BlockFrequency() { - delete BFI; -} - -void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BranchProbabilityInfo>(); - AU.setPreservesAll(); -} - -bool BlockFrequency::runOnFunction(Function &F) { - BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - BFI->doFunction(&F, &BPI); - return false; -} - -/// getblockFreq - Return block frequency. Never return 0, value must be -/// positive. Please note that initial frequency is equal to 1024. It means that -/// we should not rely on the value itself, but only on the comparison to the -/// other block frequencies. We do this to avoid using of floating points. -/// -uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) { - return BFI->getBlockFreq(BB); -} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp new file mode 100644 index 0000000..d16665f --- /dev/null +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -0,0 +1,63 @@ +//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", + true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", + true, true) + +char BlockFrequencyInfo::ID = 0; + + +BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { + initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequencyInfo::~BlockFrequencyInfo() { + delete BFI; +} + +void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequencyInfo::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); + return false; +} + +void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { + if (BFI) BFI->print(O); +} + +/// getblockFreq - Return block frequency. Return 0 if we don't have the +/// information. Please note that initial frequency is equal to 1024. It means +/// that we should not rely on the value itself, but only on the comparison to +/// the other block frequencies. We do this to avoid using of floating points. +/// +BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const { + return BFI->getBlockFreq(BB); +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index e39cd22..bde3b76 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,7 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Support/Debug.h" @@ -33,7 +36,7 @@ namespace { // private methods are hidden in the .cpp file. class BranchProbabilityAnalysis { - typedef std::pair<BasicBlock *, BasicBlock *> Edge; + typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; DenseMap<Edge, uint32_t> *Weights; @@ -52,7 +55,7 @@ class BranchProbabilityAnalysis { // V // BB1<-+ // | | - // | | (Weight = 128) + // | | (Weight = 124) // V | // BB2--+ // | @@ -60,12 +63,21 @@ class BranchProbabilityAnalysis { // V // BB3 // - // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696.. - // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303.. + // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 + // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 - static const uint32_t LBH_TAKEN_WEIGHT = 128; + static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + static const uint32_t RH_TAKEN_WEIGHT = 24; + static const uint32_t RH_NONTAKEN_WEIGHT = 8; + + static const uint32_t PH_TAKEN_WEIGHT = 20; + static const uint32_t PH_NONTAKEN_WEIGHT = 12; + + static const uint32_t ZH_TAKEN_WEIGHT = 20; + static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + // Standard weight value. Used when none of the heuristics set weight for // the edge. static const uint32_t NORMAL_WEIGHT = 16; @@ -100,29 +112,6 @@ class BranchProbabilityAnalysis { return false; } - // Multiply Edge Weight by two. - void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { - uint32_t Weight = BP->getEdgeWeight(Src, Dst); - uint32_t MaxWeight = getMaxWeightFor(Src); - - if (Weight * 2 > MaxWeight) - BP->setEdgeWeight(Src, Dst, MaxWeight); - else - BP->setEdgeWeight(Src, Dst, Weight * 2); - } - - // Divide Edge Weight by two. - void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { - uint32_t Weight = BP->getEdgeWeight(Src, Dst); - - assert(Weight > 0); - if (Weight / 2 < MIN_WEIGHT) - BP->setEdgeWeight(Src, Dst, MIN_WEIGHT); - else - BP->setEdgeWeight(Src, Dst, Weight / 2); - } - - uint32_t getMaxWeightFor(BasicBlock *BB) const { return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); } @@ -133,49 +122,119 @@ public: : Weights(W), BP(BP), LI(LI) { } + // Metadata Weights + bool calcMetadataWeights(BasicBlock *BB); + // Return Heuristics - void calcReturnHeuristics(BasicBlock *BB); + bool calcReturnHeuristics(BasicBlock *BB); // Pointer Heuristics - void calcPointerHeuristics(BasicBlock *BB); + bool calcPointerHeuristics(BasicBlock *BB); // Loop Branch Heuristics - void calcLoopBranchHeuristics(BasicBlock *BB); + bool calcLoopBranchHeuristics(BasicBlock *BB); + + // Zero Heurestics + bool calcZeroHeuristics(BasicBlock *BB); bool runOnFunction(Function &F); }; } // end anonymous namespace +// Propagate existing explicit probabilities from either profile data or +// 'expect' intrinsic processing. +bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 1) + return false; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + return false; + + MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); + if (!WeightsNode) + return false; + + // Ensure there are weights for all of the successors. Note that the first + // operand to the metadata node is a name, not a weight. + if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) + return false; + + // Build up the final weights that will be used in a temporary buffer, but + // don't add them until all weihts are present. Each weight value is clamped + // to [1, getMaxWeightFor(BB)]. + uint32_t WeightLimit = getMaxWeightFor(BB); + SmallVector<uint32_t, 2> Weights; + Weights.reserve(TI->getNumSuccessors()); + for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { + ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i)); + if (!Weight) + return false; + Weights.push_back( + std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit))); + } + assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + + return true; +} + // Calculate Edge Weights using "Return Heuristics". Predict a successor which // leads directly to Return Instruction will not be taken. -void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ +bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ if (BB->getTerminator()->getNumSuccessors() == 1) - return; + return false; + + SmallPtrSet<BasicBlock *, 4> ReturningEdges; + SmallPtrSet<BasicBlock *, 4> StayEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; - if (isReturningBlock(Succ)) { - decEdgeWeight(BB, Succ); + if (isReturningBlock(Succ)) + ReturningEdges.insert(Succ); + else + StayEdges.insert(Succ); + } + + if (uint32_t numStayEdges = StayEdges.size()) { + uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges; + if (stayWeight < NORMAL_WEIGHT) + stayWeight = NORMAL_WEIGHT; + + for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(), + E = StayEdges.end(); I != E; ++I) + BP->setEdgeWeight(BB, *I, stayWeight); + } + + if (uint32_t numRetEdges = ReturningEdges.size()) { + uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges; + if (retWeight < MIN_WEIGHT) + retWeight = MIN_WEIGHT; + for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(), + E = ReturningEdges.end(); I != E; ++I) { + BP->setEdgeWeight(BB, *I, retWeight); } } + + return ReturningEdges.size() > 0; } // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. -void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { +bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) - return; + return false; Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); if (!CI || !CI->isEquality()) - return; + return false; Value *LHS = CI->getOperand(0); if (!LHS->getType()->isPointerTy()) - return; + return false; assert(CI->getOperand(1)->getType()->isPointerTy()); @@ -190,29 +249,35 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - incEdgeWeight(BB, Taken); - decEdgeWeight(BB, NonTaken); + BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); + BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { +bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); Loop *L = LI->getLoopFor(BB); if (!L) - return; + return false; + + SmallPtrSet<BasicBlock *, 8> BackEdges; + SmallPtrSet<BasicBlock *, 8> ExitingEdges; + SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop. - SmallVector<BasicBlock *, 8> BackEdges; - SmallVector<BasicBlock *, 8> ExitingEdges; + bool isHeader = BB == L->getHeader(); for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; Loop *SuccL = LI->getLoopFor(Succ); if (SuccL != L) - ExitingEdges.push_back(Succ); + ExitingEdges.insert(Succ); else if (Succ == L->getHeader()) - BackEdges.push_back(Succ); + BackEdges.insert(Succ); + else if (isHeader) + InEdges.insert(Succ); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -220,39 +285,121 @@ void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; BP->setEdgeWeight(BB, Back, backWeight); } } + if (uint32_t numInEdges = InEdges.size()) { + uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; + if (inWeight < NORMAL_WEIGHT) + inWeight = NORMAL_WEIGHT; + + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(), + EE = InEdges.end(); EI != EE; ++EI) { + BasicBlock *Back = *EI; + BP->setEdgeWeight(BB, Back, inWeight); + } + } + uint32_t numExitingEdges = ExitingEdges.size(); if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { BasicBlock *Exiting = *EI; BP->setEdgeWeight(BB, Exiting, exitWeight); } } + + return true; } +bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return false; + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + if (!CV) + return false; + + bool isProb; + if (CV->isZero()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != 0 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SLT: + // X < 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_SGT: + // X > 0 -> Likely + isProb = true; + break; + default: + return false; + } + } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { + // InstCombine canonicalizes X <= 0 into X < 1. + // X <= 0 -> Unlikely + isProb = false; + } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) { + // InstCombine canonicalizes X >= 0 into X > -1. + // X >= 0 -> Likely + isProb = true; + } else { + return false; + } + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + if (!isProb) + std::swap(Taken, NonTaken); + + BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); + BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + + return true; +} + + bool BranchProbabilityAnalysis::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { BasicBlock *BB = I++; - // Only LBH uses setEdgeWeight method. - calcLoopBranchHeuristics(BB); + if (calcMetadataWeights(BB)) + continue; + + if (calcLoopBranchHeuristics(BB)) + continue; - // PH and RH use only incEdgeWeight and decEwdgeWeight methods to - // not efface LBH results. - calcPointerHeuristics(BB); - calcReturnHeuristics(BB); + if (calcReturnHeuristics(BB)) + continue; + + if (calcPointerHeuristics(BB)) + continue; + + calcZeroHeuristics(BB); } return false; @@ -269,11 +416,11 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { return BPA.runOnFunction(F); } -uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { +uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; + for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + const BasicBlock *Succ = *I; uint32_t Weight = getEdgeWeight(BB, Succ); uint32_t PrevSum = Sum; @@ -284,7 +431,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { return Sum; } -bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const { +bool BranchProbabilityInfo:: +isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% uint32_t Weight = getEdgeWeight(Src, Dst); uint32_t Sum = getSumForBlock(Src); @@ -321,8 +469,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { } // Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. -uint32_t -BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { Edge E(Src, Dst); DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); @@ -332,8 +480,8 @@ BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { return DEFAULT_WEIGHT; } -void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, - uint32_t Weight) { +void BranchProbabilityInfo:: +setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { Weights[std::make_pair(Src, Dst)] = Weight; DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " << Dst->getNameStr() << " weight to " << Weight @@ -342,7 +490,7 @@ void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, BranchProbability BranchProbabilityInfo:: -getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { +getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { uint32_t N = getEdgeWeight(Src, Dst); uint32_t D = getSumForBlock(Src); diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 7fca17e..df79849 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -43,11 +43,16 @@ using namespace llvm; /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with /// TargetData. This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. -static Constant *FoldBitCast(Constant *C, const Type *DestTy, +static Constant *FoldBitCast(Constant *C, Type *DestTy, const TargetData &TD) { - - // This only handles casts to vectors currently. - const VectorType *DestVTy = dyn_cast<VectorType>(DestTy); + // Catch the obvious splat cases. + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) + return Constant::getAllOnesValue(DestTy); + + // The code below only handles casts to vectors currently. + VectorType *DestVTy = dyn_cast<VectorType>(DestTy); if (DestVTy == 0) return ConstantExpr::getBitCast(C, DestTy); @@ -69,8 +74,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - const Type *SrcEltTy = CV->getType()->getElementType(); - const Type *DstEltTy = DestVTy->getElementType(); + Type *SrcEltTy = CV->getType()->getElementType(); + Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which // requires endianness information to do the right thing. For example, @@ -85,7 +90,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); - const Type *DestIVTy = + Type *DestIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); @@ -99,7 +104,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // it to integer first. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - const Type *SrcIVTy = + Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); @@ -212,11 +217,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. - if (const StructType *ST = dyn_cast<StructType>(*GTI)) { + if (StructType *ST = dyn_cast<StructType>(*GTI)) { // N = N + Offset Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); } else { - const SequentialType *SQT = cast<SequentialType>(*GTI); + SequentialType *SQT = cast<SequentialType>(*GTI); Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); } } @@ -354,8 +359,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, const TargetData &TD) { - const Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); - const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); // If this isn't an integer load we can't fold it directly. if (!IntType) { @@ -363,7 +368,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // and then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. - const Type *MapTy; + Type *MapTy; if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) @@ -443,7 +448,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, std::string Str; if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.length(); - const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); // Replace load with immediate integer if the result is an integer or fp // value. @@ -478,7 +483,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + Type *ResTy = cast<PointerType>(C->getType())->getElementType(); if (GV->getInitializer()->isNullValue()) return Constant::getNullValue(ResTy); if (isa<UndefValue>(GV->getInitializer())) @@ -536,19 +541,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// CastGEPIndices - If array indices are not pointer-sized integers, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. -static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, - const Type *ResultTy, +static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, + Type *ResultTy, const TargetData *TD) { if (!TD) return 0; - const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); bool Any = false; SmallVector<Constant*, 32> NewIdxs; - for (unsigned i = 1; i != NumOps; ++i) { + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), - reinterpret_cast<Value *const *>(Ops+1), - i-1))) && + Ops.slice(1, i-1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -562,7 +566,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, if (!Any) return 0; Constant *C = - ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size()); + ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; @@ -571,23 +575,23 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, - const Type *ResultTy, +static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, + Type *ResultTy, const TargetData *TD) { Constant *Ptr = Ops[0]; if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) return 0; - const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' - for (unsigned i = 1; i != NumOps; ++i) + for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa<ConstantInt>(Ops[i])) { // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" - if (NumOps == 2 && + if (Ops.size() == 2 && cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); assert((CE == 0 || CE->getType() == IntPtrTy) && @@ -606,9 +610,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, } unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); - APInt Offset = APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), - (Value**)Ops+1, NumOps-1)); + APInt Offset = + APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), + makeArrayRef((Value **)Ops.data() + 1, + Ops.size() - 1))); Ptr = cast<Constant>(Ptr->stripPointerCasts()); // If this is a GEP of a GEP, fold it all into a single GEP. @@ -627,9 +632,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, Ptr = cast<Constant>(GEP->getOperand(0)); Offset += APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), - (Value**)NestedOps.data(), - NestedOps.size())); + TD->getIndexedOffset(Ptr->getType(), NestedOps)); Ptr = cast<Constant>(Ptr->stripPointerCasts()); } @@ -649,10 +652,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // we eliminate over-indexing of the notional static type array bounds. // This makes it easy to determine if the getelementptr is "inbounds". // Also, this helps GlobalOpt do SROA on GlobalVariables. - const Type *Ty = Ptr->getType(); + Type *Ty = Ptr->getType(); SmallVector<Constant*, 32> NewIdxs; do { - if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { if (ATy->isPointerTy()) { // The only pointer indexing we'll do is on the first index of the GEP. if (!NewIdxs.empty()) @@ -665,7 +668,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -679,7 +682,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); } Ty = ATy->getElementType(); - } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { // Determine which field of the struct the offset points into. The // getZExtValue is at least as safe as the StructLayout API because we // know the offset is within the struct at this point. @@ -703,7 +706,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Create a GEP. Constant *C = - ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + ConstantExpr::getGetElementPtr(Ptr, NewIdxs); assert(cast<PointerType>(C->getType())->getElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -778,8 +781,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { cast<Constant>(EVI->getAggregateOperand()), EVI->getIndices()); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Ops.data(), Ops.size(), TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression @@ -800,8 +802,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], TD); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), - Ops.data(), Ops.size(), TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -814,8 +815,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// -Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, - Constant* const* Ops, unsigned NumOps, +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, + ArrayRef<Constant *> Ops, const TargetData *TD) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { @@ -831,9 +832,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ICmp: case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: - if (Function *F = dyn_cast<Function>(Ops[NumOps - 1])) + if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops, NumOps - 1); + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1)); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing @@ -887,12 +888,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD)) + if (Constant *C = CastGEPIndices(Ops, DestTy, TD)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD)) return C; - return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); } } @@ -912,7 +913,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { if (TD && Ops1->isNullValue()) { - const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the // proper extension or truncation. @@ -934,7 +935,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the @@ -967,7 +968,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD); } } @@ -987,7 +988,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, // addressing... gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); for (++I; I != E; ++I) - if (const StructType *STy = dyn_cast<StructType>(*I)) { + if (StructType *STy = dyn_cast<StructType>(*I)) { ConstantInt *CU = cast<ConstantInt>(I.getOperand()); assert(CU->getZExtValue() < STy->getNumElements() && "Struct index out of range!"); @@ -1002,7 +1003,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, return 0; } } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) { + if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) { if (CI->getZExtValue() >= ATy->getNumElements()) return 0; if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) @@ -1013,7 +1014,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, C = UndefValue::get(ATy->getElementType()); else return 0; - } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) { + } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) { if (CI->getZExtValue() >= VTy->getNumElements()) return 0; if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) @@ -1101,7 +1102,7 @@ llvm::canConstantFoldCallTo(const Function *F) { } static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - const Type *Ty) { + Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V); if (sys::llvm_fenv_testexcept()) { @@ -1118,7 +1119,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), - double V, double W, const Type *Ty) { + double V, double W, Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V, W); if (sys::llvm_fenv_testexcept()) { @@ -1143,7 +1144,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), /// performed, otherwise returns the Constant value resulting from the /// conversion. static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, - const Type *Ty) { + Type *Ty) { assert(Op && "Called with NULL operand"); APFloat Val(Op->getValueAPF()); @@ -1167,13 +1168,12 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * -llvm::ConstantFoldCall(Function *F, - Constant *const *Operands, unsigned NumOperands) { +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { if (!F->hasName()) return 0; StringRef Name = F->getName(); - const Type *Ty = F->getReturnType(); - if (NumOperands == 1) { + Type *Ty = F->getReturnType(); + if (Operands.size() == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); @@ -1327,7 +1327,7 @@ llvm::ConstantFoldCall(Function *F, return 0; } - if (NumOperands == 2) { + if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp index ac5eeeb..bfa429d 100644 --- a/contrib/llvm/lib/Analysis/DIBuilder.cpp +++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp @@ -29,14 +29,74 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { } DIBuilder::DIBuilder(Module &m) - : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} + : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0), + TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0), + ValueFn(0) +{} + +/// finalize - Construct any deferred debug info descriptors. +void DIBuilder::finalize() { + DIArray Enums = getOrCreateArray(AllEnumTypes); + DIType(TempEnumTypes).replaceAllUsesWith(Enums); + + DIArray RetainTypes = getOrCreateArray(AllRetainTypes); + DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); + + DIArray SPs = getOrCreateArray(AllSubprograms); + DIType(TempSubprograms).replaceAllUsesWith(SPs); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) { + SmallVector<Value *, 4> Variables; + for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii) + Variables.push_back(NMD->getOperand(ii)); + if (MDNode *Temp = SP.getVariablesNodes()) { + DIArray AV = getOrCreateArray(Variables); + DIType(Temp).replaceAllUsesWith(AV); + } + NMD->eraseFromParent(); + } + } + + DIArray GVs = getOrCreateArray(AllGVs); + DIType(TempGVs).replaceAllUsesWith(GVs); +} + +/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return +/// N. +static MDNode *getNonCompileUnitScope(MDNode *N) { + if (DIDescriptor(N).isCompileUnit()) + return NULL; + return N; +} /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. -void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, - StringRef Directory, StringRef Producer, - bool isOptimized, StringRef Flags, +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, unsigned RunTimeVer) { + assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89 + && "Invalid Language tag"); + assert (!Filename.empty() + && "Unable to create compile unit without filename"); + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + TempEnumTypes = MDNode::getTemporary(VMContext, TElts); + Value *THElts[] = { TempEnumTypes }; + MDNode *EnumHolder = MDNode::get(VMContext, THElts); + + TempRetainTypes = MDNode::getTemporary(VMContext, TElts); + Value *TRElts[] = { TempRetainTypes }; + MDNode *RetainHolder = MDNode::get(VMContext, TRElts); + + TempSubprograms = MDNode::getTemporary(VMContext, TElts); + Value *TSElts[] = { TempSubprograms }; + MDNode *SPHolder = MDNode::get(VMContext, TSElts); + + TempGVs = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { TempGVs }; + MDNode *GVHolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -48,7 +108,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), + EnumHolder, + RetainHolder, + SPHolder, + GVHolder }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); @@ -61,17 +125,19 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); + assert(!Filename.empty() && "Unable to create file without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), MDString::get(VMContext, Filename), MDString::get(VMContext, Directory), - TheCU + NULL // TheCU }; return DIFile(MDNode::get(VMContext, Elts)); } /// createEnumerator - Create a single enumerator value. DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { + assert(!Name.empty() && "Unable to create enumerator without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -80,16 +146,37 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { return DIEnumerator(MDNode::get(VMContext, Elts)); } -/// createBasicType - Create debugging information entry for a basic +/// createNullPtrType - Create C++0x nullptr type. +DIType DIBuilder::createNullPtrType(StringRef Name) { + assert(!Name.empty() && "Unable to create type without name"); + // nullptr is encoded in DIBasicType format. Line number, filename, + // ,size, alignment, offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), + NULL, //TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Encoding + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createBasicType - Create debugging information entry for a basic /// type, e.g 'char'. -DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding) { + assert(!Name.empty() && "Unable to create type without name"); // Basic types are encoded in DIBasicType format. Line number, filename, // offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, Name), NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -108,7 +195,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), - TheCU, + NULL, //TheCU, MDString::get(VMContext, StringRef()), // Empty name. NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -127,7 +214,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, Name), NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -142,10 +229,11 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, /// createReferenceType - Create debugging information entry for a reference. DIType DIBuilder::createReferenceType(DIType RTy) { + assert(RTy.Verify() && "Unable to create reference type"); // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), - TheCU, + NULL, // TheCU, NULL, // Name NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -165,7 +253,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), @@ -199,9 +287,10 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { } /// createInheritance - Create debugging information entry to establish -/// inheritnace relationship between two types. -DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, +/// inheritance relationship between two types. +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { + assert(Ty.Verify() && "Unable to create inheritance"); // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), @@ -219,15 +308,15 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, } /// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, + uint64_t OffsetInBits, unsigned Flags, DIType Ty) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -242,17 +331,17 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, + uint64_t OffsetInBits, unsigned Flags, DIType Ty, StringRef PropertyName, StringRef GetterName, StringRef SetterName, unsigned PropertyAttributes) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - File, // Or TheCU ? Ty ? + getNonCompileUnitScope(File), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -270,8 +359,8 @@ DIType DIBuilder::createObjCIVar(StringRef Name, } /// createClassType - Create debugging information entry for a class. -DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, @@ -279,7 +368,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -298,13 +387,13 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, /// createTemplateTypeParameter - Create debugging information for template /// type parameter. -DITemplateTypeParameter +DITemplateTypeParameter DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, DIType Ty, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), Ty, File, @@ -316,14 +405,14 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, /// createTemplateValueParameter - Create debugging information for template /// value parameter. -DITemplateValueParameter +DITemplateValueParameter DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, DIType Ty, uint64_t Val, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), Ty, ConstantInt::get(Type::getInt64Ty(VMContext), Val), @@ -335,15 +424,15 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, } /// createStructType - Create debugging information entry for a struct. -DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - unsigned Flags, DIArray Elements, + unsigned Flags, DIArray Elements, unsigned RunTimeLang) { // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -360,7 +449,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, } /// createUnionType - Create debugging information entry for an union. -DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, @@ -368,7 +457,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -389,9 +478,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), - File, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), MDString::get(VMContext, ""), - File, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -405,16 +494,17 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { return DIType(MDNode::get(VMContext, Elts)); } -/// createEnumerationType - Create debugging information entry for an +/// createEnumerationType - Create debugging information entry for an /// enumeration. -DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, DIArray Elements) { +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + DIArray Elements) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -428,20 +518,19 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; MDNode *Node = MDNode::get(VMContext, Elts); - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); + AllEnumTypes.push_back(Node); return DIType(Node); } /// createArrayType - Create debugging information entry for an array. -DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, ""), - TheCU, + NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -456,14 +545,14 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, } /// createVectorType - Create debugging information entry for a vector. -DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_vector_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, ""), - TheCU, + NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -501,18 +590,17 @@ DIType DIBuilder::createArtificialType(DIType Ty) { return DIType(MDNode::get(VMContext, Elts)); } -/// retainType - Retain DIType in a module even if it is not referenced +/// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. void DIBuilder::retainType(DIType T) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); - NMD->addOperand(T); + AllRetainTypes.push_back(T); } /// createUnspecifiedParameter - Create unspeicified type descriptor /// for the subroutine type. DIDescriptor DIBuilder::createUnspecifiedParameter() { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; return DIDescriptor(MDNode::get(VMContext, Elts)); } @@ -532,7 +620,7 @@ DIType DIBuilder::createTemporaryType(DIFile F) { // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type), - F.getCompileUnit(), + TheCU, NULL, F }; @@ -563,12 +651,12 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { /// createGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - TheCU, + NULL, // TheCU, MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, Name), @@ -580,22 +668,20 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, Val }; MDNode *Node = MDNode::get(VMContext, Elts); - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); + AllGVs.push_back(Node); return DIGlobalVariable(Node); } /// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: -createStaticVariable(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, unsigned LineNumber, +createStaticVariable(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -607,26 +693,25 @@ createStaticVariable(DIDescriptor Context, StringRef Name, Val }; MDNode *Node = MDNode::get(VMContext, Elts); - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); + AllGVs.push_back(Node); return DIGlobalVariable(Node); } /// createVariable - Create a new descriptor for the specified variable. DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNo, DIType Ty, + unsigned LineNo, DIType Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { Value *Elts[] = { GetTagConstant(VMContext, Tag), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags) + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Constant::getNullValue(Type::getInt32Ty(VMContext)), }; MDNode *Node = MDNode::get(VMContext, Elts); if (AlwaysPreserve) { @@ -634,13 +719,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, // to preserve variable info in such situation then stash it in a // named mdnode. DISubprogram Fn(getDISubprogram(Scope)); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); FnLocals->addOperand(Node); } return DIVariable(Node); @@ -655,12 +734,14 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, unsigned ArgNo) { SmallVector<Value *, 15> Elts; Elts.push_back(GetTagConstant(VMContext, Tag)); - Elts.push_back(Scope); + Elts.push_back(getNonCompileUnitScope(Scope)), Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), + (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); Elts.append(Addr.begin(), Addr.end()); return DIVariable(MDNode::get(VMContext, Elts)); @@ -677,10 +758,15 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Function *Fn, MDNode *TParams, MDNode *Decl) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -696,13 +782,13 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParams, - Decl + Decl, + THolder }; MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); + AllSubprograms.push_back(Node); return DISubprogram(Node); } @@ -720,10 +806,15 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, bool isOptimized, Function *Fn, MDNode *TParam) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -739,12 +830,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParam, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + THolder }; MDNode *Node = MDNode::get(VMContext, Elts); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); return DISubprogram(Node); } @@ -754,7 +843,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) @@ -762,13 +851,25 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, return DINameSpace(MDNode::get(VMContext, Elts)); } +/// createLexicalBlockFile - This creates a new MDNode that encapsulates +/// an existing scope with a new filename. +DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, + DIFile File) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + Scope, + File + }; + return DILexicalBlockFile(MDNode::get(VMContext, Elts)); +} + DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { // Defeat MDNode uniqing for lexical blocks by using unique id. static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), - Scope, + getNonCompileUnitScope(Scope), ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt32Ty(VMContext), Col), File, @@ -836,4 +937,3 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, VarInfo }; return CallInst::Create(ValueFn, Args, "", InsertAtEnd); } - diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp index b23c351..cd832ab 100644 --- a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp +++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp @@ -171,7 +171,7 @@ static bool getLocationInfo(const Value *V, std::string &DisplayName, void PrintDbgInfo::printVariableDeclaration(const Value *V) { std::string DisplayName, File, Directory, Type; - unsigned LineNo; + unsigned LineNo = 0; if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) return; diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp index b42e946..44457d3 100644 --- a/contrib/llvm/lib/Analysis/DebugInfo.cpp +++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp @@ -39,6 +39,9 @@ DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { } +DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) { +} + DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { } @@ -111,9 +114,17 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const { unsigned DIVariable::getNumAddrElements() const { if (getVersion() <= llvm::LLVMDebugVersion8) return DbgNode->getNumOperands()-6; - return DbgNode->getNumOperands()-7; + if (getVersion() == llvm::LLVMDebugVersion9) + return DbgNode->getNumOperands()-7; + return DbgNode->getNumOperands()-8; } +/// getInlinedAt - If this variable is inlined then return inline location. +MDNode *DIVariable::getInlinedAt() const { + if (getVersion() <= llvm::LLVMDebugVersion9) + return NULL; + return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)); +} //===----------------------------------------------------------------------===// // Predicates @@ -122,7 +133,14 @@ unsigned DIVariable::getNumAddrElements() const { /// isBasicType - Return true if the specified tag is legal for /// DIBasicType. bool DIDescriptor::isBasicType() const { - return DbgNode && getTag() == dwarf::DW_TAG_base_type; + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_unspecified_type: + return true; + default: + return false; + } } /// isDerivedType - Return true if the specified tag is legal for DIDerivedType. @@ -248,9 +266,17 @@ bool DIDescriptor::isNameSpace() const { return DbgNode && getTag() == dwarf::DW_TAG_namespace; } +/// isLexicalBlockFile - Return true if the specified descriptor is a +/// lexical block with an extra file. +bool DIDescriptor::isLexicalBlockFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() == 3); +} + /// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. bool DIDescriptor::isLexicalBlock() const { - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block; + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() > 3); } /// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. @@ -320,6 +346,22 @@ void DIType::replaceAllUsesWith(MDNode *D) { } } +/// isUnsignedDIType - Return true if type encoding is unsigned. +bool DIType::isUnsignedDIType() { + DIDerivedType DTy(DbgNode); + if (DTy.Verify()) + return DTy.getTypeDerivedFrom().isUnsignedDIType(); + + DIBasicType BTy(DbgNode); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} + /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { if (!DbgNode) @@ -335,7 +377,7 @@ bool DICompileUnit::Verify() const { bool DIType::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) + if (getContext() && !getContext().Verify()) return false; unsigned Tag = getTag(); if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && @@ -343,6 +385,7 @@ bool DIType::Verify() const { Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type && Tag != dwarf::DW_TAG_enumeration_type + && Tag != dwarf::DW_TAG_subroutine_type && getFilename().empty()) return false; return true; @@ -362,12 +405,9 @@ bool DIDerivedType::Verify() const { bool DICompositeType::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) + if (getContext() && !getContext().Verify()) return false; - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) - return false; return true; } @@ -376,11 +416,7 @@ bool DISubprogram::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) - return false; - - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (getContext() && !getContext().Verify()) return false; DICompositeType Ty = getType(); @@ -397,11 +433,7 @@ bool DIGlobalVariable::Verify() const { if (getDisplayName().empty()) return false; - if (!getContext().Verify()) - return false; - - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (getContext() && !getContext().Verify()) return false; DIType Ty = getType(); @@ -419,10 +451,7 @@ bool DIVariable::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) - return false; - - if (!getCompileUnit().Verify()) + if (getContext() && !getContext().Verify()) return false; DIType Ty = getType(); @@ -446,8 +475,6 @@ bool DINameSpace::Verify() const { return false; if (getName().empty()) return false; - if (!getCompileUnit().Verify()) - return false; return true; } @@ -504,9 +531,28 @@ unsigned DISubprogram::isOptimized() const { return 0; } +MDNode *DISubprogram::getVariablesNodes() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return NULL; + if (MDNode *Temp = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + return dyn_cast_or_null<MDNode>(Temp->getOperand(0)); + return NULL; +} + +DIArray DISubprogram::getVariables() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return DIArray(); + if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + if (MDNode *A = dyn_cast_or_null<MDNode>(T->getOperand(0))) + return DIArray(A); + return DIArray(); +} + StringRef DIScope::getFilename() const { if (!DbgNode) return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getFilename(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getFilename(); if (isSubprogram()) @@ -526,6 +572,8 @@ StringRef DIScope::getFilename() const { StringRef DIScope::getDirectory() const { if (!DbgNode) return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getDirectory(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getDirectory(); if (isSubprogram()) @@ -542,6 +590,47 @@ StringRef DIScope::getDirectory() const { return StringRef(); } +DIArray DICompileUnit::getEnumTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getRetainedTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getSubprograms() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + + +DIArray DICompileUnit::getGlobalVariables() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -573,7 +662,6 @@ void DIType::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << "line " << getLineNumber() << ", " << getSizeInBits() << " bits, " @@ -629,7 +717,6 @@ void DISubprogram::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) @@ -652,7 +739,6 @@ void DIGlobalVariable::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) @@ -666,13 +752,48 @@ void DIGlobalVariable::print(raw_ostream &OS) const { OS << "]\n"; } +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; + } + } +} + +void DIVariable::printExtendedName(raw_ostream &OS) const { + const LLVMContext &Ctx = DbgNode->getContext(); + StringRef Res = getName(); + if (!Res.empty()) + OS << Res << "," << getLineNumber(); + if (MDNode *InlinedAt = getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} + /// print - Print variable. void DIVariable::print(raw_ostream &OS) const { StringRef Res = getName(); if (!Res.empty()) OS << " [" << Res << "] "; - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; getType().print(OS); OS << "\n"; @@ -744,22 +865,61 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. -NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { +NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) { SmallString<32> Name = StringRef("llvm.dbg.lv."); - fixupObjcLikeName(FuncName, Name); - + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); return M.getNamedMetadata(Name.str()); } /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable /// to hold function specific information. -NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) { SmallString<32> Name = StringRef("llvm.dbg.lv."); - fixupObjcLikeName(FuncName, Name); - + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); + return M.getOrInsertNamedMetadata(Name.str()); } +/// createInlinedVariable - Create a new inlined variable based on current +/// variable. +/// @param DV Current Variable. +/// @param InlinedScope Location at current variable is inlined. +DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, + LLVMContext &VMContext) { + SmallVector<Value *, 16> Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? Elts.push_back(InlinedScope) : + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// cleanseInlinedVariable - Remove inlined scope from the variable. +DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { + SmallVector<Value *, 16> Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))): + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. @@ -767,6 +927,10 @@ NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) + addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i))); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; @@ -785,6 +949,10 @@ void DebugInfoFinder::processModule(Module &M) { addCompileUnit(DICompileUnit(Scope)); else if (Scope.isSubprogram()) processSubprogram(DISubprogram(Scope)); + else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Scope); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } else if (Scope.isLexicalBlock()) processLexicalBlock(DILexicalBlock(Scope)); @@ -796,7 +964,8 @@ void DebugInfoFinder::processModule(Module &M) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); if (addGlobalVariable(DIG)) { - addCompileUnit(DIG.getCompileUnit()); + if (DIG.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIG.getCompileUnit()); processType(DIG.getType()); } } @@ -817,6 +986,10 @@ void DebugInfoFinder::processLocation(DILocation Loc) { processSubprogram(DISubprogram(S)); else if (S.isLexicalBlock()) processLexicalBlock(DILexicalBlock(S)); + else if (S.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(S); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } processLocation(Loc.getOrigLocation()); } @@ -824,8 +997,8 @@ void DebugInfoFinder::processLocation(DILocation Loc) { void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) return; - - addCompileUnit(DT.getCompileUnit()); + if (DT.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DT.getCompileUnit()); if (DT.isCompositeType()) { DICompositeType DCT(DT); processType(DCT.getTypeDerivedFrom()); @@ -848,6 +1021,10 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { DIScope Context = LB.getContext(); if (Context.isLexicalBlock()) return processLexicalBlock(DILexicalBlock(Context)); + else if (Context.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Context); + return processLexicalBlock(DILexicalBlock(DBF.getScope())); + } else return processSubprogram(DISubprogram(Context)); } @@ -856,7 +1033,8 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (!addSubprogram(SP)) return; - addCompileUnit(SP.getCompileUnit()); + if (SP.getVersion() <= LLVMDebugVersion10) + addCompileUnit(SP.getCompileUnit()); processType(SP.getType()); } @@ -871,8 +1049,8 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { if (!NodesSeen.insert(DV)) return; - - addCompileUnit(DIVariable(N).getCompileUnit()); + if (DIVariable(N).getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIVariable(N).getCompileUnit()); processType(DIVariable(N).getType()); } @@ -930,6 +1108,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) { if (D.isSubprogram()) return DISubprogram(Scope); + if (D.isLexicalBlockFile()) + return getDISubprogram(DILexicalBlockFile(Scope).getContext()); + if (D.isLexicalBlock()) return getDISubprogram(DILexicalBlock(Scope).getContext()); @@ -946,3 +1127,17 @@ DICompositeType llvm::getDICompositeType(DIType T) { return DICompositeType(); } + +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool llvm::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext()); + return false; +} + diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index 659ffab..963da75 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -44,8 +44,8 @@ namespace { class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager(int Depth) - : ModulePass(ID), PMDataManager(Depth) { } + explicit CGPassManager() + : ModulePass(ID), PMDataManager() { } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. @@ -350,6 +350,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; } ); + (void)MadeChange; return DevirtualizedCall; } @@ -542,7 +543,7 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Call Graph Pass Manager - CGP = new CGPassManager(PMD->getDepth() + 1); + CGP = new CGPassManager(); // [2] Set up new manager's top level manager PMTopLevelManager *TPM = PMD->getTopLevelManager(); diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp index 6535786..e9df3ca 100644 --- a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp @@ -29,7 +29,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types", // IncorporateType - Incorporate one type and all of its subtypes into the // collection of used types. // -void FindUsedTypes::IncorporateType(const Type *Ty) { +void FindUsedTypes::IncorporateType(Type *Ty) { // If ty doesn't already exist in the used types map, add it now, otherwise // return. if (!UsedTypes.insert(Ty)) return; // Already contain Ty. @@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) { // void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { OS << "Types in use by this module:\n"; - for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), + for (SetVector<Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { OS << " " << **I << '\n'; } diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index e5f0a77..d0ca892 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -146,7 +146,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { ISE, User, I, NewUse.PostIncLoops, *SE, *DT); - DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + DEBUG(if (SE->getSCEV(I) != ISE) + dbgs() << " NORMALIZED TO: " << *ISE << '\n'); } } return true; diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index efde598..e12e322 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -24,13 +25,13 @@ using namespace llvm; /// TODO: Perhaps calls like memcpy, strcpy, etc? bool llvm::callIsSmall(const Function *F) { if (!F) return false; - + if (F->hasLocalLinkage()) return false; - + if (!F->hasName()) return false; - + StringRef Name = F->getName(); - + // These will all likely lower to a single selection DAG node. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || Name == "fabs" || Name == "fabsf" || Name == "fabsl" || @@ -38,7 +39,7 @@ bool llvm::callIsSmall(const Function *F) { Name == "cos" || Name == "cosf" || Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) return true; - + // These are all likely to be optimized into something smaller. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || Name == "exp2l" || Name == "exp2f" || @@ -46,13 +47,14 @@ bool llvm::callIsSmall(const Function *F) { Name == "round" || Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") return true; - + return false; } /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetData *TD) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); @@ -67,8 +69,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ImmutableCallSite CS(cast<Instruction>(II)); if (const Function *F = CS.getCalledFunction()) { - // If a function is both internal and has a single use, then it is - // extremely likely to get inlined in the future (it was probably + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). if (F->hasInternalLinkage() && F->hasOneUse()) ++NumInlineCandidates; @@ -91,20 +93,25 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ++NumCalls; } } - + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (!AI->isStaticAlloca()) this->usesDynamicAlloca = true; } if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) - ++NumVectorInsts; - + ++NumVectorInsts; + if (const CastInst *CI = dyn_cast<CastInst>(II)) { // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) continue; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (isa<TruncInst>(CI) && TD && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + continue; // Result of a cmp instruction is often extended (to be used by other // cmp instructions, logical or return instructions). These are usually // nop on most sane targets. @@ -119,10 +126,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ++NumInsts; } - + if (isa<ReturnInst>(BB->getTerminator())) ++NumRets; - + // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this indirect @@ -217,7 +224,7 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. -void CodeMetrics::analyzeFunction(Function *F) { +void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { // If this function contains a call to setjmp or _setjmp, never inline // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they @@ -227,13 +234,14 @@ void CodeMetrics::analyzeFunction(Function *F) { // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB); + analyzeBasicBlock(&*BB, TD); } /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { - Metrics.analyzeFunction(F); +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, + const TargetData *TD) { + Metrics.analyzeFunction(F, TD); // A function with exactly one return has it removed during the inlining // process (see InlineFunction), so don't count it. @@ -252,7 +260,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { /// NeverInline - returns true if the function should never be inlined into /// any caller bool InlineCostAnalyzer::FunctionInfo::NeverInline() { - return (Metrics.callsSetJmp || Metrics.isRecursive || + return (Metrics.callsSetJmp || Metrics.isRecursive || Metrics.containsIndirectBr); } // getSpecializationBonus - The heuristic used to determine the per-call @@ -263,19 +271,19 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, { if (Callee->mayBeOverridden()) return 0; - + int Bonus = 0; // If this function uses the coldcc calling convention, prefer not to // specialize it. if (Callee->getCallingConv() == CallingConv::Cold) Bonus -= InlineConstants::ColdccPenalty; - + // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); unsigned ArgNo = 0; unsigned i = 0; @@ -286,7 +294,7 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, Bonus += CountBonusForConstant(I); } - // Calls usually take a long time, so they make the specialization gain + // Calls usually take a long time, so they make the specialization gain // smaller. Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; @@ -300,13 +308,13 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, // inlining because we decide we don't want to give a bonus for // devirtualizing. int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { - + // This could just be NULL. if (!C) return 0; - + Function *F = dyn_cast<Function>(C); if (!F) return 0; - + int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); return (Bonus > 0) ? 0 : Bonus; } @@ -355,18 +363,18 @@ int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { Bonus += CountBonusForConstant(&Inst); } } - + return Bonus; } int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); - + CalleeFI->analyzeFunction(Callee, TD); + // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. @@ -392,9 +400,9 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // weights calculated for the callee to determine how much will be folded // away with this information. else if (isa<Constant>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } - + // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. @@ -408,28 +416,28 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Look at the size of the callee. Each instruction counts as 5. InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; - + return InlineCost; } int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); - + CalleeFI->analyzeFunction(Callee, TD); + bool isDirectCall = CS.getCalledFunction() == Callee; Instruction *TheCall = CS.getInstruction(); int Bonus = 0; - + // If there is only one call of the function, and it has internal linkage, // make it almost guaranteed to be inlined. // if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) Bonus += InlineConstants::LastCallToStaticBonus; - + // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this. @@ -438,12 +446,12 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { Bonus += InlineConstants::NoreturnPenalty; } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) Bonus += InlineConstants::NoreturnPenalty; - + // If this function uses the coldcc calling convention, prefer not to inline // it. if (Callee->getCallingConv() == CallingConv::Cold) Bonus += InlineConstants::ColdccPenalty; - + // Add to the inline quality for properties that make the call valuable to // inline. This includes factors that indicate that the result of inlining // the function will be optimizable. Currently this just looks at arguments @@ -455,7 +463,7 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { // Compute any constant bonus due to inlining we want to give here. if (isa<Constant>(I)) Bonus += CountBonusForConstant(FI, cast<Constant>(I)); - + return Bonus; } @@ -483,10 +491,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); // If we should never inline this, return a huge cost. if (CalleeFI->NeverInline()) @@ -498,15 +506,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // requires handling setjmp somewhere else, however. if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getAlways(); - + if (CalleeFI->Metrics.usesDynamicAlloca) { // Get information about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. if (CallerFI.Metrics.NumBlocks == 0) { - CallerFI.analyzeFunction(Caller); - + CallerFI.analyzeFunction(Caller, TD); + // Recompute the CalleeFI pointer, getting Caller could have invalidated // it. CalleeFI = &CachedFunctionInfo[Callee]; @@ -538,16 +546,16 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, // something else. if (Callee->mayBeOverridden()) return llvm::InlineCost::getNever(); - + // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); int Cost = 0; - + // Look at the original size of the callee. Each instruction counts as 5. Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; @@ -564,13 +572,13 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, // higher threshold to determine if the function call should be inlined. float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { Function *Callee = CS.getCalledFunction(); - + // Get information about the callee. FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee); + CalleeFI.analyzeFunction(Callee, TD); float Factor = 1.0f; // Single BB functions are often written to be inlined. @@ -604,7 +612,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { --CallerMetrics.NumCalls; if (Callee == 0) return; - + CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; // If we don't have metrics for the callee, don't recalculate them just to @@ -614,7 +622,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { resetCachedCostInfo(Caller); return; } - + // Since CalleeMetrics were already calculated, we know that the CallerMetrics // reference isn't invalidated: both were in the DenseMap. CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; @@ -636,7 +644,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { CallerMetrics.NumInsts -= Callee->arg_size(); else CallerMetrics.NumInsts = 0; - + // We are not updating the argument weights. We have already determined that // Caller is a fairly large function, so we accept the loss of precision. } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 8709f6b..131cc97 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -48,6 +48,26 @@ static Value *SimplifyOrInst(Value *, Value *, const TargetData *, static Value *SimplifyXorInst(Value *, Value *, const TargetData *, const DominatorTree *, unsigned); +/// getFalse - For a boolean type, or a vector of boolean type, return false, or +/// a vector with every element false, as appropriate for the type. +static Constant *getFalse(Type *Ty) { + assert((Ty->isIntegerTy(1) || + (Ty->isVectorTy() && + cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + "Expected i1 type or a vector of i1!"); + return Constant::getNullValue(Ty); +} + +/// getTrue - For a boolean type, or a vector of boolean type, return true, or +/// a vector with every element true, as appropriate for the type. +static Constant *getTrue(Type *Ty) { + assert((Ty->isIntegerTy(1) || + (Ty->isVectorTy() && + cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + "Expected i1 type or a vector of i1!"); + return Constant::getAllOnesValue(Ty); +} + /// ValueDominatesPHI - Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); @@ -526,7 +546,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -595,7 +615,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // X - undef -> undef @@ -715,7 +735,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -788,7 +808,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -909,7 +929,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -1012,7 +1032,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -1138,7 +1158,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1227,7 +1247,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1321,7 +1341,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1372,7 +1392,7 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); } -static const Type *GetCompareTy(Value *Op) { +static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -1413,8 +1433,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Pred = CmpInst::getSwappedPredicate(Pred); } - const Type *ITy = GetCompareTy(LHS); // The return type. - const Type *OpTy = LHS->getType(); // The operand type. + Type *ITy = GetCompareTy(LHS); // The return type. + Type *OpTy = LHS->getType(); // The operand type. // icmp X, X -> true/false // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false @@ -1478,48 +1498,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: assert(false && "Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); case ICmpInst::ICMP_UGE: - // getAllOnesValue also works for vectors, unlike getTrue. - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: if (isKnownNonZero(LHS, TD)) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; case ICmpInst::ICMP_SLT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); if (LHSKnownNonNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_SGE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); if (LHSKnownNonNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; case ICmpInst::ICMP_SGT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; } } @@ -1593,8 +1611,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { Instruction *LI = cast<CastInst>(LHS); Value *SrcOp = LI->getOperand(0); - const Type *SrcTy = SrcOp->getType(); - const Type *DstTy = LI->getType(); + Type *SrcTy = SrcOp->getType(); + Type *DstTy = LI->getType(); // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. @@ -1811,8 +1829,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); @@ -1822,8 +1839,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - // getAllOnesValue also works for vectors, unlike getTrue. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); } } if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { @@ -1840,8 +1856,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - // getAllOnesValue also works for vectors, unlike getTrue. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); @@ -1851,8 +1866,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -1874,7 +1888,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; break; case Instruction::Shl: { - bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap(); + bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap(); bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); if (!NUW && !NSW) break; @@ -1955,10 +1969,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } case CmpInst::ICMP_SGE: // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case CmpInst::ICMP_SLT: // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -2025,10 +2039,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } case CmpInst::ICMP_UGE: // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case CmpInst::ICMP_ULT: // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -2040,40 +2054,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_SGE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_SLT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && match(RHS, m_SMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_SLE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_SGT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && match(RHS, m_UMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_UGE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_ULT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && match(RHS, m_UMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_ULE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_UGT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } // If the comparison is with the result of a select instruction, check whether @@ -2219,43 +2233,71 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, const DominatorTree *) { // The type of the GEP pointer operand. - const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); // getelementptr P -> P. - if (NumOps == 1) + if (Ops.size() == 1) return Ops[0]; if (isa<UndefValue>(Ops[0])) { // Compute the (pointer) type returned by the GEP instruction. - const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1], - NumOps-1); - const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); return UndefValue::get(GEPTy); } - if (NumOps == 2) { + if (Ops.size() == 2) { // getelementptr P, 0 -> P. if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) if (C->isZero()) return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. if (TD) { - const Type *Ty = PtrTy->getElementType(); + Type *Ty = PtrTy->getElementType(); if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) return Ops[0]; } } // Check to see if this is constant foldable. - for (unsigned i = 0; i != NumOps; ++i) + for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa<Constant>(Ops[i])) return 0; - return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), - (Constant *const*)Ops+1, NumOps-1); + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); +} + +/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we +/// can fold the result. If not, this returns null. +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const TargetData *, + const DominatorTree *) { + if (Constant *CAgg = dyn_cast<Constant>(Agg)) + if (Constant *CVal = dyn_cast<Constant>(Val)) + return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); + + // insertvalue x, undef, n -> x + if (match(Val, m_Undef())) + return Agg; + + // insertvalue x, (extractvalue y, n), n + if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val)) + if (EV->getAggregateOperand()->getType() == Agg->getType() && + EV->getIndices() == Idxs) { + // insertvalue undef, (extractvalue y, n), n -> y + if (match(Agg, m_Undef())) + return EV->getAggregateOperand(); + + // insertvalue y, (extractvalue y, n), n -> y + if (Agg == EV->getAggregateOperand()) + return Agg; + } + + return 0; } /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. @@ -2328,7 +2370,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD); } // If the operation is associative, try some generic simplifications. @@ -2456,7 +2498,14 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT); + Result = SimplifyGEPInst(Ops, TD, DT); + break; + } + case Instruction::InsertValue: { + InsertValueInst *IV = cast<InsertValueInst>(I); + Result = SimplifyInsertValueInst(IV->getAggregateOperand(), + IV->getInsertedValueOperand(), + IV->getIndices(), TD, DT); break; } case Instruction::PHI: diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 6e27597..f80595c 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -630,7 +630,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (BB == &BB->getParent()->getEntryBlock()) { assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); if (NotNull) { - const PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } else { Result.markOverdefined(); @@ -658,7 +658,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, // If we previously determined that this is a pointer that can't be null // then return that rather than giving up entirely. if (NotNull) { - const PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } @@ -728,7 +728,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, ConstantRange LHSRange = LHSVal.getConstantRange(); ConstantRange RHSRange(1); - const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); if (isa<BinaryOperator>(BBI)) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) { RHSRange = ConstantRange(RHS->getValue()); diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 89755da..38d677d 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -71,7 +71,7 @@ namespace { void visitCallSite(CallSite CS); void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, - const Type *Ty, unsigned Flags); + Type *Ty, unsigned Flags); void visitCallInst(CallInst &I); void visitInvokeInst(InvokeInst &I); @@ -201,7 +201,7 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: Caller and callee calling convention differ", &I); - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); Assert1(FT->isVarArg() ? @@ -240,7 +240,7 @@ void Lint::visitCallSite(CallSite CS) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - const Type *Ty = + Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), TD ? TD->getABITypeAlignment(Ty) : 0, @@ -364,7 +364,7 @@ void Lint::visitReturnInst(ReturnInst &I) { // TODO: Check readnone/readonly function attributes. void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, - const Type *Ty, unsigned Flags) { + Type *Ty, unsigned Flags) { // If no memory is being referenced, it doesn't matter if the pointer // is valid. if (Size == 0) diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index c5c676b..0e6bcbf 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -63,7 +63,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, return V; SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); @@ -90,7 +90,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (TD) Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); - const Type *BaseType = 0; + Type *BaseType = 0; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. @@ -114,7 +114,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, return true; // Loading directly from an alloca or global is OK. // Check if the load is within the bounds of the underlying object. - const PointerType *AddrTy = cast<PointerType>(V->getType()); + PointerType *AddrTy = cast<PointerType>(V->getType()); uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) @@ -169,7 +169,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If we're using alias analysis to disambiguate get the size of *Ptr. uint64_t AccessSize = 0; if (AA) { - const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); AccessSize = AA->getTypeStoreSize(AccessTy); } @@ -188,12 +188,16 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, --ScanFrom; // If this is a load of Ptr, the loaded value is available. + // (This is true even if the load is volatile or atomic, although + // those cases are unlikely.) if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) return LI; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If this is a store through Ptr, the value is available! + // (This is true even if the store is volatile or atomic, although + // those cases are unlikely.) if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) return SI->getOperand(0); diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp index c1afe8f..3997ac4 100644 --- a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp @@ -76,7 +76,13 @@ static void GetMemRefInstrs(const Loop *L, } static bool IsLoadOrStoreInst(Value *I) { - return isa<LoadInst>(I) || isa<StoreInst>(I); + // Returns true if the load or store can be analyzed. Atomic and volatile + // operations have properties which this analysis does not understand. + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->isUnordered(); + else if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->isUnordered(); + return false; } static Value *GetPointerOperand(Value *I) { diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 0583140..85aacca 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -55,12 +56,12 @@ bool Loop::isLoopInvariant(Value *V) const { } /// hasLoopInvariantOperands - Return true if all the operands of the -/// specified instruction are loop invariant. +/// specified instruction are loop invariant. bool Loop::hasLoopInvariantOperands(Instruction *I) const { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!isLoopInvariant(I->getOperand(i))) return false; - + return true; } @@ -98,6 +99,9 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, return false; if (I->mayReadFromMemory()) return false; + // The landingpad instruction is immobile. + if (isa<LandingPadInst>(I)) + return false; // Determine the insertion point, unless one was given. if (!InsertPt) { BasicBlock *Preheader = getLoopPreheader(); @@ -110,7 +114,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) return false; - + // Hoist. I->moveBefore(InsertPt); Changed = true; @@ -383,6 +387,205 @@ void Loop::dump() const { } //===----------------------------------------------------------------------===// +// UnloopUpdater implementation +// + +namespace { +/// Find the new parent loop for all blocks within the "unloop" whose last +/// backedges has just been removed. +class UnloopUpdater { + Loop *Unloop; + LoopInfo *LI; + + LoopBlocksDFS DFS; + + // Map unloop's immediate subloops to their nearest reachable parents. Nested + // loops within these subloops will not change parents. However, an immediate + // subloop's new parent will be the nearest loop reachable from either its own + // exits *or* any of its nested loop's exits. + DenseMap<Loop*, Loop*> SubloopParents; + + // Flag the presence of an irreducible backedge whose destination is a block + // directly contained by the original unloop. + bool FoundIB; + +public: + UnloopUpdater(Loop *UL, LoopInfo *LInfo) : + Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {} + + void updateBlockParents(); + + void removeBlocksFromAncestors(); + + void updateSubloopParents(); + +protected: + Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop); +}; +} // end anonymous namespace + +/// updateBlockParents - Update the parent loop for all blocks that are directly +/// contained within the original "unloop". +void UnloopUpdater::updateBlockParents() { + if (Unloop->getNumBlocks()) { + // Perform a post order CFG traversal of all blocks within this loop, + // propagating the nearest loop from sucessors to predecessors. + LoopBlocksTraversal Traversal(DFS, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + + if (NL != L) { + // For reducible loops, NL is now an ancestor of Unloop. + assert((NL != Unloop && (!NL || NL->contains(Unloop))) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + } + else { + // Or the current block is part of a subloop, in which case its parent + // is unchanged. + assert((FoundIB || Unloop->contains(L)) && "uninitialized successor"); + } + } + } + // Each irreducible loop within the unloop induces a round of iteration using + // the DFS result cached by Traversal. + bool Changed = FoundIB; + for (unsigned NIters = 0; Changed; ++NIters) { + assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm"); + + // Iterate over the postorder list of blocks, propagating the nearest loop + // from successors to predecessors as before. + Changed = false; + for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(), + POE = DFS.endPostorder(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + if (NL != L) { + assert(NL != Unloop && (!NL || NL->contains(Unloop)) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + Changed = true; + } + } + } +} + +/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below +/// their new parents. +void UnloopUpdater::removeBlocksFromAncestors() { + // Remove unloop's blocks from all ancestors below their new parents. + for (Loop::block_iterator BI = Unloop->block_begin(), + BE = Unloop->block_end(); BI != BE; ++BI) { + Loop *NewParent = LI->getLoopFor(*BI); + // If this block is an immediate subloop, remove all blocks (including + // nested subloops) from ancestors below the new parent loop. + // Otherwise, if this block is in a nested subloop, skip it. + if (SubloopParents.count(NewParent)) + NewParent = SubloopParents[NewParent]; + else if (Unloop->contains(NewParent)) + continue; + + // Remove blocks from former Ancestors except Unloop itself which will be + // deleted. + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent; + OldParent = OldParent->getParentLoop()) { + assert(OldParent && "new loop is not an ancestor of the original"); + OldParent->removeBlockFromLoop(*BI); + } + } +} + +/// updateSubloopParents - Update the parent loop for all subloops directly +/// nested within unloop. +void UnloopUpdater::updateSubloopParents() { + while (!Unloop->empty()) { + Loop *Subloop = *llvm::prior(Unloop->end()); + Unloop->removeChildLoop(llvm::prior(Unloop->end())); + + assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); + if (SubloopParents[Subloop]) + SubloopParents[Subloop]->addChildLoop(Subloop); + else + LI->addTopLevelLoop(Subloop); + } +} + +/// getNearestLoop - Return the nearest parent loop among this block's +/// successors. If a successor is a subloop header, consider its parent to be +/// the nearest parent of the subloop's exits. +/// +/// For subloop blocks, simply update SubloopParents and return NULL. +Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { + + // Initially for blocks directly contained by Unloop, NearLoop == Unloop and + // is considered uninitialized. + Loop *NearLoop = BBLoop; + + Loop *Subloop = 0; + if (NearLoop != Unloop && Unloop->contains(NearLoop)) { + Subloop = NearLoop; + // Find the subloop ancestor that is directly contained within Unloop. + while (Subloop->getParentLoop() != Unloop) { + Subloop = Subloop->getParentLoop(); + assert(Subloop && "subloop is not an ancestor of the original loop"); + } + // Get the current nearest parent of the Subloop exits, initially Unloop. + if (!SubloopParents.count(Subloop)) + SubloopParents[Subloop] = Unloop; + NearLoop = SubloopParents[Subloop]; + } + + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) { + assert(!Subloop && "subloop blocks must have a successor"); + NearLoop = 0; // unloop blocks may now exit the function. + } + for (; I != E; ++I) { + if (*I == BB) + continue; // self loops are uninteresting + + Loop *L = LI->getLoopFor(*I); + if (L == Unloop) { + // This successor has not been processed. This path must lead to an + // irreducible backedge. + assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB"); + FoundIB = true; + } + if (L != Unloop && Unloop->contains(L)) { + // Successor is in a subloop. + if (Subloop) + continue; // Branching within subloops. Ignore it. + + // BB branches from the original into a subloop header. + assert(L->getParentLoop() == Unloop && "cannot skip into nested loops"); + + // Get the current nearest parent of the Subloop's exits. + L = SubloopParents[L]; + // L could be Unloop if the only exit was an irreducible backedge. + } + if (L == Unloop) { + continue; + } + // Handle critical edges from Unloop into a sibling loop. + if (L && !L->contains(Unloop)) { + L = L->getParentLoop(); + } + // Remember the nearest parent loop among successors or subloop exits. + if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L)) + NearLoop = L; + } + if (Subloop) { + SubloopParents[Subloop] = NearLoop; + return BBLoop; + } + return NearLoop; +} + +//===----------------------------------------------------------------------===// // LoopInfo implementation // bool LoopInfo::runOnFunction(Function &) { @@ -391,6 +594,68 @@ bool LoopInfo::runOnFunction(Function &) { return false; } +/// updateUnloop - The last backedge has been removed from a loop--now the +/// "unloop". Find a new parent for the blocks contained within unloop and +/// update the loop tree. We don't necessarily have valid dominators at this +/// point, but LoopInfo is still valid except for the removal of this loop. +/// +/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without +/// checking first is illegal. +void LoopInfo::updateUnloop(Loop *Unloop) { + + // First handle the special case of no parent loop to simplify the algorithm. + if (!Unloop->getParentLoop()) { + // Since BBLoop had no parent, Unloop blocks are no longer in a loop. + for (Loop::block_iterator I = Unloop->block_begin(), + E = Unloop->block_end(); I != E; ++I) { + + // Don't reparent blocks in subloops. + if (getLoopFor(*I) != Unloop) + continue; + + // Blocks no longer have a parent but are still referenced by Unloop until + // the Unloop object is deleted. + LI.changeLoopFor(*I, 0); + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI.begin();; ++I) { + assert(I != LI.end() && "Couldn't find loop"); + if (*I == Unloop) { + LI.removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!Unloop->empty()) + LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end()))); + + return; + } + + // Update the parent loop for all blocks within the loop. Blocks within + // subloops will not change parents. + UnloopUpdater Updater(Unloop, this); + Updater.updateBlockParents(); + + // Remove blocks from former ancestor loops. + Updater.removeBlocksFromAncestors(); + + // Add direct subloops as children in their new parent loop. + Updater.updateSubloopParents(); + + // Remove unloop from its parent loop. + Loop *ParentLoop = Unloop->getParentLoop(); + for (Loop::iterator I = ParentLoop->begin();; ++I) { + assert(I != ParentLoop->end() && "Couldn't find loop"); + if (*I == Unloop) { + ParentLoop->removeChildLoop(I); + break; + } + } +} + void LoopInfo::verifyAnalysis() const { // LoopInfo is a FunctionPass, but verifying every loop in the function // each time verifyAnalysis is called is very expensive. The @@ -400,12 +665,21 @@ void LoopInfo::verifyAnalysis() const { if (!VerifyLoopInfo) return; + DenseSet<const Loop*> Loops; for (iterator I = begin(), E = end(); I != E; ++I) { assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); - (*I)->verifyLoopNest(); + (*I)->verifyLoopNest(&Loops); } - // TODO: check BBMap consistency. + // Verify that blocks are mapped to valid loops. + // + // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we + // could also verify that the blocks are still in the correct loops. + for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), + E = LI.BBMap.end(); I != E; ++I) { + assert(Loops.count(I->second) && "orphaned loop"); + assert(I->second->contains(I->first) && "orphaned block"); + } } void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { @@ -417,3 +691,15 @@ void LoopInfo::print(raw_ostream &OS, const Module*) const { LI.print(OS); } +//===----------------------------------------------------------------------===// +// LoopBlocksDFS implementation +// + +/// Traverse the loop blocks and store the DFS result. +/// Useful for clients that just want the final DFS result and don't need to +/// visit blocks during the initial traversal. +void LoopBlocksDFS::perform(LoopInfo *LI) { + LoopBlocksTraversal Traversal(*this, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) ; +} diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 10e3f29..5ba1f40 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -59,9 +59,9 @@ char PrintLoopPass::ID = 0; static DebugInfoProbeInfo *TheDebugProbe; static void createDebugInfoProbe() { if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, // before static globals, thus it will be destroyed before them. static ManagedStatic<DebugInfoProbeInfo> DIP; TheDebugProbe = &*DIP; @@ -73,73 +73,29 @@ static void createDebugInfoProbe() { char LPPassManager::ID = 0; -LPPassManager::LPPassManager(int Depth) - : FunctionPass(ID), PMDataManager(Depth) { +LPPassManager::LPPassManager() + : FunctionPass(ID), PMDataManager() { skipThisLoop = false; redoThisLoop = false; LI = NULL; CurrentLoop = NULL; } -/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). void LPPassManager::deleteLoopFromQueue(Loop *L) { - if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop. - // Reparent all of the blocks in this loop. Since BBLoop had a parent, - // they are now all in it. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops. - LI->changeLoopFor(*I, ParentLoop); - - // Remove the loop from its parent loop. - for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();; - ++I) { - assert(I != E && "Couldn't find loop"); - if (*I == L) { - ParentLoop->removeChildLoop(I); - break; - } - } - - // Move all subloops into the parent loop. - while (!L->empty()) - ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1)); - } else { - // Reparent all of the blocks in this loop. Since BBLoop had no parent, - // they no longer in a loop at all. - - for (unsigned i = 0; i != L->getBlocks().size(); ++i) { - // Don't change blocks in subloops. - if (LI->getLoopFor(L->getBlocks()[i]) == L) { - LI->removeBlock(L->getBlocks()[i]); - --i; - } - } - - // Remove the loop from the top-level LoopInfo object. - for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) { - assert(I != E && "Couldn't find loop"); - if (*I == L) { - LI->removeLoop(I); - break; - } - } - - // Move all of the subloops to the top-level. - while (!L->empty()) - LI->addTopLevelLoop(L->removeChildLoop(L->end()-1)); - } - - delete L; + LI->updateUnloop(L); // If L is current loop then skip rest of the passes and let // runOnFunction remove L from LQ. Otherwise, remove L from LQ now // and continue applying other passes on CurrentLoop. - if (CurrentLoop == L) { + if (CurrentLoop == L) skipThisLoop = true; + + delete L; + + if (skipThisLoop) return; - } for (std::deque<Loop *>::iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { @@ -166,10 +122,10 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { void LPPassManager::insertLoopIntoQueue(Loop *L) { // Insert L into loop queue - if (L == CurrentLoop) + if (L == CurrentLoop) redoLoop(L); else if (!L->getParentLoop()) - // This is top level loop. + // This is top level loop. LQ.push_front(L); else { // Insert L after the parent loop. @@ -195,9 +151,9 @@ void LPPassManager::redoLoop(Loop *L) { /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for /// all loop passes. -void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *LP = getContainedPass(Index); LP->cloneBasicBlockAnalysis(From, To, L); } @@ -206,13 +162,13 @@ void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, /// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { Instruction &I = *BI; deleteSimpleAnalysisValue(&I, L); } } - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *LP = getContainedPass(Index); LP->deleteAnalysisValue(V, L); } @@ -228,7 +184,7 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { /// Pass Manager itself does not invalidate any analysis info. void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { - // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // LPPassManager needs LoopInfo. In the long term LoopInfo class will // become part of LPPassManager. Info.addRequired<LoopInfo>(); Info.setPreservesAll(); @@ -255,7 +211,7 @@ bool LPPassManager::runOnFunction(Function &F) { for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { Loop *L = *I; - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); Changed |= P->doInitialization(L, *this); } @@ -263,13 +219,13 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - + CurrentLoop = LQ.back(); skipThisLoop = false; redoThisLoop = false; // Run all passes on the current Loop. - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); @@ -319,23 +275,23 @@ bool LPPassManager::runOnFunction(Function &F) { // Do not run other passes on this loop. break; } - + // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. if (skipThisLoop) - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "<deleted>", ON_LOOP_MSG); } // Pop the loop from queue after running all passes. LQ.pop_back(); - + if (redoThisLoop) LQ.push_back(CurrentLoop); } - + // Finalization for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); @@ -372,7 +328,7 @@ Pass *LoopPass::createPrinterPass(raw_ostream &O, // LPPassManger as expected. void LoopPass::preparePassManager(PMStack &PMS) { - // Find LPPassManager + // Find LPPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_LoopPassManager) PMS.pop(); @@ -381,14 +337,14 @@ void LoopPass::preparePassManager(PMStack &PMS) { // by other passes that are managed by LPM then do not insert // this pass in current LPM. Use new LPPassManager. if (PMS.top()->getPassManagerType() == PMT_LoopPassManager && - !PMS.top()->preserveHigherLevelAnalysis(this)) + !PMS.top()->preserveHigherLevelAnalysis(this)) PMS.pop(); } /// Assign pass manager to manage this pass. void LoopPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { - // Find LPPassManager + // Find LPPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_LoopPassManager) PMS.pop(); @@ -397,12 +353,12 @@ void LoopPass::assignPassManager(PMStack &PMS, if (PMS.top()->getPassManagerType() == PMT_LoopPassManager) LPPM = (LPPassManager*)PMS.top(); else { - // Create new Loop Pass Manager if it does not exist. + // Create new Loop Pass Manager if it does not exist. assert (!PMS.empty() && "Unable to create Loop Pass Manager"); PMDataManager *PMD = PMS.top(); - // [1] Create new Call Graph Pass Manager - LPPM = new LPPassManager(PMD->getDepth() + 1); + // [1] Create new Loop Pass Manager + LPPM = new LPPassManager(); LPPM->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index 2283db0..fde07ea 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -25,8 +25,17 @@ namespace { struct MemDepPrinter : public FunctionPass { const Function *F; - typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag; - typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep; + enum DepType { + Clobber = 0, + Def, + NonFuncLocal, + Unknown + }; + + static const char* DepTypeStr[]; + + typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair; + typedef std::pair<InstTypePair, const BasicBlock *> Dep; typedef SmallSetVector<Dep, 4> DepSet; typedef DenseMap<const Instruction *, DepSet> DepSetMap; DepSetMap Deps; @@ -50,6 +59,21 @@ namespace { Deps.clear(); F = 0; } + + private: + static InstTypePair getInstTypePair(MemDepResult dep) { + if (dep.isClobber()) + return InstTypePair(dep.getInst(), Clobber); + if (dep.isDef()) + return InstTypePair(dep.getInst(), Def); + if (dep.isNonFuncLocal()) + return InstTypePair(dep.getInst(), NonFuncLocal); + assert(dep.isUnknown() && "unexptected dependence type"); + return InstTypePair(dep.getInst(), Unknown); + } + static InstTypePair getInstTypePair(const Instruction* inst, DepType type) { + return InstTypePair(inst, type); + } }; } @@ -64,6 +88,9 @@ FunctionPass *llvm::createMemDepPrinter() { return new MemDepPrinter(); } +const char* MemDepPrinter::DepTypeStr[] + = {"Clobber", "Def", "NonFuncLocal", "Unknown"}; + bool MemDepPrinter::runOnFunction(Function &F) { this->F = &F; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); @@ -79,10 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { - assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && - "Local dep should be unknown, def or clobber!"); - Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), + Deps[Inst].insert(std::make_pair(getInstTypePair(Res), static_cast<BasicBlock *>(0))); } else if (CallSite CS = cast<Value>(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = @@ -92,22 +116,26 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && - "Resolved non-local call dep should be unknown, def or " - "clobber!"); - InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), - I->getBB())); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); } } else { SmallVector<NonLocalDepResult, 4> NLDI; if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - // FIXME: Volatile is not handled properly here. + if (!LI->isUnordered()) { + // FIXME: Handle atomic/volatile loads. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } AliasAnalysis::Location Loc = AA.getLocation(LI); - MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(), - LI->getParent(), NLDI); + MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // FIXME: Volatile is not handled properly here. + if (!LI->isUnordered()) { + // FIXME: Handle atomic/volatile stores. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } AliasAnalysis::Location Loc = AA.getLocation(SI); MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { @@ -121,11 +149,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (SmallVectorImpl<NonLocalDepResult>::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert(Res.isClobber() != Res.isDef() && - "Resolved non-local pointer dep should be def or clobber!"); - InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), - I->getBB())); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); } } } @@ -146,26 +170,18 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); I != E; ++I) { const Instruction *DepInst = I->first.getPointer(); - bool isClobber = I->first.getInt(); + DepType type = I->first.getInt(); const BasicBlock *DepBB = I->second; OS << " "; - if (!DepInst) - OS << "Unknown"; - else if (isClobber) - OS << "Clobber"; - else - OS << " Def"; + OS << DepTypeStr[type]; if (DepBB) { OS << " in block "; WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); } if (DepInst) { OS << " from: "; - if (DepInst == Inst) - OS << "<unspecified>"; - else - DepInst->print(OS); + DepInst->print(OS); } OS << "\n"; } diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 53d4304..8d451c4 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -47,7 +47,7 @@ static bool isMallocCall(const CallInst *CI) { // Check malloc prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); + FunctionType *FTy = Callee->getFunctionType(); if (FTy->getNumParams() != 1) return false; return FTy->getParamType(0)->isIntegerTy(32) || @@ -94,12 +94,12 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, return NULL; // The size of the malloc's result type must be known to determine array size. - const Type *T = getMallocAllocatedType(CI); + Type *T = getMallocAllocatedType(CI); if (!T || !T->isSized() || !TD) return NULL; unsigned ElementSize = TD->getTypeAllocSize(T); - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, @@ -133,10 +133,10 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 0: PointerType is the calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -const PointerType *llvm::getMallocType(const CallInst *CI) { +PointerType *llvm::getMallocType(const CallInst *CI) { assert(isMalloc(CI) && "getMallocType and not malloc call"); - const PointerType *MallocType = NULL; + PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -164,8 +164,8 @@ const PointerType *llvm::getMallocType(const CallInst *CI) { /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -const Type *llvm::getMallocAllocatedType(const CallInst *CI) { - const PointerType *PT = getMallocType(CI); +Type *llvm::getMallocAllocatedType(const CallInst *CI) { + PointerType *PT = getMallocType(CI); return PT ? PT->getElementType() : NULL; } @@ -201,7 +201,7 @@ const CallInst *llvm::isFreeCall(const Value *I) { // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); + FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) return 0; if (FTy->getNumParams() != 1) diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index bba4482..92967c0 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -120,21 +120,27 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, AliasAnalysis::Location &Loc, AliasAnalysis *AA) { if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - if (LI->isVolatile()) { - Loc = AliasAnalysis::Location(); + if (LI->isUnordered()) { + Loc = AA->getLocation(LI); + return AliasAnalysis::Ref; + } else if (LI->getOrdering() == Monotonic) { + Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } - Loc = AA->getLocation(LI); - return AliasAnalysis::Ref; + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; } if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (SI->isVolatile()) { - Loc = AliasAnalysis::Location(); + if (SI->isUnordered()) { + Loc = AA->getLocation(SI); + return AliasAnalysis::Mod; + } else if (SI->getOrdering() == Monotonic) { + Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } - Loc = AA->getLocation(SI); - return AliasAnalysis::Mod; + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { @@ -232,7 +238,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getUnknown(); + return MemDepResult::getNonFuncLocal(); } /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that @@ -270,8 +276,8 @@ unsigned MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI, const TargetData &TD) { - // We can only extend non-volatile integer loads. - if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0; + // We can only extend simple integer loads. + if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; // Get the base of this load. int64_t LIOffs = 0; @@ -369,6 +375,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Atomic loads have complications involved. + // FIXME: This is overly conservative. + if (!LI->isUnordered()) + return MemDepResult::getClobber(LI); + AliasAnalysis::Location LoadLoc = AA->getLocation(LI); // If we found a pointer, check if it could be the same as our pointer. @@ -382,7 +393,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // location is 1 byte at P+1). If so, return it as a load/load // clobber result, allowing the client to decide to widen the load if // it wants to. - if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) @@ -424,6 +435,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Atomic stores have complications involved. + // FIXME: This is overly conservative. + if (!SI->isUnordered()) + return MemDepResult::getClobber(SI); + // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. @@ -483,7 +499,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getUnknown(); + return MemDepResult::getNonFuncLocal(); } /// getDependency - Return the instruction on which a memory operation @@ -516,7 +532,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { if (QueryParent != &QueryParent->getParent()->getEntryBlock()) LocalCache = MemDepResult::getNonLocal(); else - LocalCache = MemDepResult::getUnknown(); + LocalCache = MemDepResult::getNonFuncLocal(); } else { AliasAnalysis::Location MemLoc; AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); @@ -672,7 +688,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // a clobber, otherwise it is unknown. Dep = MemDepResult::getNonLocal(); } else { - Dep = MemDepResult::getUnknown(); + Dep = MemDepResult::getNonFuncLocal(); } // If we had a dirty entry for the block, update it. Otherwise, just add @@ -790,7 +806,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (Dep.isNonLocal() || Dep.isUnknown()) + if (!Dep.isDef() && !Dep.isClobber()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index 70dcd0d..7e22ddc 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -407,9 +407,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, } GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); + GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); Result->setIsInBounds(GEP->isInBounds()); NewInsts.push_back(Result); return Result; diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp index 7c584da..0e3b6e6 100644 --- a/contrib/llvm/lib/Analysis/PathNumbering.cpp +++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp @@ -387,7 +387,7 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) - || isa<UnwindInst>(terminator)) + || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator)) addEdge(currentNode, getExit(),0); currentNode->setColor(BallLarusNode::GRAY); diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp index 80eda79..3a3529b 100644 --- a/contrib/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -27,8 +27,8 @@ using namespace llvm; char RGPassManager::ID = 0; -RGPassManager::RGPassManager(int Depth) - : FunctionPass(ID), PMDataManager(Depth) { +RGPassManager::RGPassManager() + : FunctionPass(ID), PMDataManager() { skipThisRegion = false; redoThisRegion = false; RI = NULL; @@ -250,7 +250,7 @@ void RegionPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Region Pass Manager - RGPM = new RGPassManager(PMD->getDepth() + 1); + RGPM = new RGPassManager(); RGPM->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 025718e..e0ac56c 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -197,7 +197,7 @@ void SCEV::print(raw_ostream &OS) const { } case scUnknown: { const SCEVUnknown *U = cast<SCEVUnknown>(this); - const Type *AllocTy; + Type *AllocTy; if (U->isSizeOf(AllocTy)) { OS << "sizeof(" << *AllocTy << ")"; return; @@ -207,7 +207,7 @@ void SCEV::print(raw_ostream &OS) const { return; } - const Type *CTy; + Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { OS << "offsetof(" << *CTy << ", "; @@ -228,7 +228,7 @@ void SCEV::print(raw_ostream &OS) const { llvm_unreachable("Unknown SCEV kind!"); } -const Type *SCEV::getType() const { +Type *SCEV::getType() const { switch (getSCEVType()) { case scConstant: return cast<SCEVConstant>(this)->getType(); @@ -297,17 +297,17 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) { } const SCEV * -ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { - const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); +ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { + IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, V, isSigned)); } SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, - unsigned SCEVTy, const SCEV *op, const Type *ty) + unsigned SCEVTy, const SCEV *op, Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -315,7 +315,7 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -323,7 +323,7 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -354,7 +354,7 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) { setValPtr(New); } -bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { +bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) @@ -371,15 +371,15 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { return false; } -bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { +bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue()) { - const Type *Ty = + Type *Ty = cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked() && CE->getNumOperands() == 3 && CE->getOperand(1)->isNullValue()) { @@ -396,7 +396,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { return false; } -bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { +bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) @@ -404,7 +404,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { CE->getNumOperands() == 3 && CE->getOperand(0)->isNullValue() && CE->getOperand(1)->isNullValue()) { - const Type *Ty = + Type *Ty = cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); // Ignore vector types here so that ScalarEvolutionExpander doesn't // emit getelementptrs that index into vectors. @@ -652,7 +652,7 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, /// Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, - const Type* ResultTy) { + Type *ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); @@ -742,7 +742,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W - const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + IntegerType *CalculationTy = IntegerType::get(SE.getContext(), CalculationBits); const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { @@ -790,7 +790,7 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, //===----------------------------------------------------------------------===// const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && @@ -877,7 +877,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -954,7 +954,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, ZMul); @@ -1062,7 +1062,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, // result, the expression "Step + sext(PreIncAR)" is congruent with // "sext(PostIncAR)" static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, - const Type *Ty, + Type *Ty, ScalarEvolution *SE) { const Loop *L = AR->getLoop(); const SCEV *Start = AR->getStart(); @@ -1070,14 +1070,26 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); - if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step) + if (!SA) + return 0; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector<const SCEV *, 4> DiffOps; + for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); + I != E; ++I) { + if (*I != Step) + DiffOps.push_back(*I); + } + if (DiffOps.size() == SA->getNumOperands()) return 0; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. // 1. NSW flags on the step increment. - const SCEV *PreStart = SA->getOperand(1); + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); @@ -1086,7 +1098,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // 2. Direct overflow check on the step operation's expression. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); - const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), SE->getSignExtendExpr(Step, WideTy)); @@ -1112,7 +1124,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Get the normalized sign-extended expression for this AddRec's Start. static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, - const Type *Ty, + Type *Ty, ScalarEvolution *SE) { const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); if (!PreStart) @@ -1123,7 +1135,7 @@ static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, } const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1208,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, SMul); @@ -1275,7 +1287,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, /// unspecified bits out to the given type. /// const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1438,7 +1450,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); @@ -1488,7 +1500,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. - const Type *Ty = Ops[0]->getType(); + Type *Ty = Ops[0]->getType(); bool FoundMatch = false; for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 @@ -1515,8 +1527,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // if the contents of the resulting outer trunc fold to something simple. for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); - const Type *DstType = Trunc->getType(); - const Type *SrcType = Trunc->getOperand()->getType(); + Type *DstType = Trunc->getType(); + Type *SrcType = Trunc->getOperand()->getType(); SmallVector<const SCEV *, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the @@ -1735,7 +1747,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; - // Otherwise, add the folded AddRec by the non-liv parts. + // Otherwise, add the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; @@ -1800,6 +1812,38 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, return S; } +static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { + uint64_t k = i*j; + if (j > 1 && k / j != i) Overflow = true; + return k; +} + +/// Compute the result of "n choose k", the binomial coefficient. If an +/// intermediate computation overflows, Overflow will be set and the return will +/// be garbage. Overflow is not cleared on absense of overflow. +static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { + // We use the multiplicative formula: + // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . + // At each iteration, we take the n-th term of the numeral and divide by the + // (k-n)th term of the denominator. This division will always produce an + // integral result, and helps reduce the chance of overflow in the + // intermediate computations. However, we can still overflow even when the + // final result would fit. + + if (n == 0 || n == k) return 1; + if (k > n) return 0; + + if (k > n/2) + k = n-k; + + uint64_t r = 1; + for (uint64_t i = 1; i <= k; ++i) { + r = umul_ov(r, n-(i-1), Overflow); + r /= i; + } + return r; +} + /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, @@ -1809,7 +1853,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!"); @@ -1960,7 +2004,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; - // Otherwise, multiply the folded AddRec by the non-liv parts. + // Otherwise, multiply the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; @@ -1974,31 +2018,65 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // multiplied together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); - ++OtherIdx) + ++OtherIdx) { if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { - // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> --> - // {A*C,+,F*D + G*B + B*D}<L> + // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); ++OtherIdx) if (const SCEVAddRecExpr *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) if (OtherAddRec->getLoop() == AddRecLoop) { - const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); - const SCEV *B = F->getStepRecurrence(*this); - const SCEV *D = G->getStepRecurrence(*this); - const SCEV *NewStep = getAddExpr(getMulExpr(F, D), - getMulExpr(G, B), - getMulExpr(B, D)); - const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, - F->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector<const SCEV*, 7> AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; + x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + } + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, + AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + } } - return getMulExpr(Ops); + if (OpsModified) + return getMulExpr(Ops); } + } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2042,21 +2120,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // Determine if the division can be folded into the operands of // its operands. // TODO: Generalize this to non-constants by using known-bits information. - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. if (!RHSC->getValue()->getValue().isPowerOf2()) ++MaxShiftAmt; - const IntegerType *ExtTy = + IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); - // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) if (const SCEVConstant *Step = - dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) - if (!Step->getValue()->getValue() - .urem(RHSC->getValue()->getValue()) && + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + const APInt &StepInt = Step->getValue()->getValue(); + const APInt &DivInt = RHSC->getValue()->getValue(); + if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), @@ -2067,6 +2146,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } + /// Get a canonical UDivExpr for a recurrence. + /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. + // We can currently only fold X%N if X is constant. + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); + if (StartC && !DivInt.urem(StepInt) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop(), SCEV::FlagAnyWrap)) { + const APInt &StartInt = StartC->getValue()->getValue(); + const APInt &StartRem = StartInt.urem(StepInt); + if (StartRem != 0) + LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, + AR->getLoop(), SCEV::FlagNW); + } + } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; @@ -2151,7 +2246,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); + Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); for (unsigned i = 1, e = Operands.size(); i != e; ++i) assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); @@ -2269,7 +2364,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!"); @@ -2373,7 +2468,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!"); @@ -2476,7 +2571,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { // If we have TargetData, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. @@ -2488,20 +2583,20 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { +const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, unsigned FieldNo) { // If we have TargetData, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. @@ -2514,17 +2609,17 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2558,14 +2653,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { /// the SCEV framework. This primarily includes integer types, and it /// can optionally include pointer types if the ScalarEvolution class /// has access to target-specific information. -bool ScalarEvolution::isSCEVable(const Type *Ty) const { +bool ScalarEvolution::isSCEVable(Type *Ty) const { // Integers and pointers are always SCEVable. return Ty->isIntegerTy() || Ty->isPointerTy(); } /// getTypeSizeInBits - Return the size in bits of the specified type, /// for which isSCEVable must return true. -uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { +uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); // If we have a TargetData, use it! @@ -2586,7 +2681,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { /// the given type and which represents how SCEV will treat the given /// type, for which isSCEVable must return true. For pointer types, /// this is the pointer-sized integer type. -const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { +Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); if (Ty->isIntegerTy()) @@ -2628,7 +2723,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { return getConstant( cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); return getMulExpr(V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); @@ -2640,7 +2735,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getConstant( cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); const SCEV *AllOnes = getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); @@ -2664,8 +2759,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, /// input value to the specified type. If the type must be extended, it is zero /// extended. const SCEV * -ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); @@ -2681,8 +2776,8 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { /// extended. const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, - const Type *Ty) { - const Type *SrcTy = V->getType(); + Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); @@ -2697,8 +2792,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, /// input value to the specified type. If the type must be extended, it is zero /// extended. The conversion must not be narrowing. const SCEV * -ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!"); @@ -2713,8 +2808,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { /// input value to the specified type. If the type must be extended, it is sign /// extended. The conversion must not be narrowing. const SCEV * -ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!"); @@ -2730,8 +2825,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { /// it is extended with unspecified bits. The conversion must not be /// narrowing. const SCEV * -ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!"); @@ -2745,8 +2840,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. const SCEV * -ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!"); @@ -3032,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // context. bool isInBounds = GEP->isInBounds(); - const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) @@ -3044,7 +3139,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { I != E; ++I) { Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); @@ -3244,7 +3339,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { // TODO: non-affine addrec if (AddRec->isAffine()) { - const Type *Ty = AddRec->getType(); + Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { @@ -3396,7 +3491,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { // TODO: non-affine addrec if (AddRec->isAffine()) { - const Type *Ty = AddRec->getType(); + Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { @@ -3503,7 +3598,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { AddOps.push_back(Op1); } AddOps.push_back(getSCEV(U->getOperand(0))); - return getAddExpr(AddOps); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V); + if (OBO->hasNoSignedWrap()) + setFlags(Flags, SCEV::FlagNSW); + if (OBO->hasNoUnsignedWrap()) + setFlags(Flags, SCEV::FlagNUW); + return getAddExpr(AddOps, Flags); } case Instruction::Mul: { // See the Add code above. @@ -3601,9 +3702,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { LCI->getValue() == CI->getValue()) if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { - const Type *UTy = U->getType(); + Type *UTy = U->getType(); const SCEV *Z0 = Z->getOperand(); - const Type *Z0Ty = Z0->getType(); + Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); // If C is a low-bits mask, the zero extend is serving to @@ -3813,6 +3914,70 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // +/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// or not constant. Will also return 0 if the maximum trip count is very large +/// (>= 2^32) +unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, + BasicBlock *ExitBlock) { + const SCEVConstant *ExitCount = + dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock)); + if (!ExitCount) + return 0; + + ConstantInt *ExitConst = ExitCount->getValue(); + + // Guard against huge trip counts. + if (ExitConst->getValue().getActiveBits() > 32) + return 0; + + // In case of integer overflow, this returns 0, which is correct. + return ((unsigned)ExitConst->getZExtValue()) + 1; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, + BasicBlock *ExitBlock) { + const SCEV *ExitCount = getExitCount(L, ExitBlock); + if (ExitCount == getCouldNotCompute()) + return 1; + + // Get the trip count from the BE count by adding 1. + const SCEV *TCMul = getAddExpr(ExitCount, + getConstant(ExitCount->getType(), 1)); + // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt + // to factor simple cases. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) + TCMul = Mul->getOperand(0); + + const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul); + if (!MulC) + return 1; + + ConstantInt *Result = MulC->getValue(); + + // Guard against huge trip counts. + if (!Result || Result->getValue().getActiveBits() > 32) + return 1; + + return (unsigned)Result->getZExtValue(); +} + +// getExitCount - Get the expression for the number of loop iterations for which +// this loop is guaranteed not to exit via ExitintBlock. Otherwise return +// SCEVCouldNotCompute. +const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { + return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); +} + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -3825,14 +3990,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// hasLoopInvariantBackedgeTakenCount). /// const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).Exact; + return getBackedgeTakenInfo(L).getExact(this); } /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).Max; + return getBackedgeTakenInfo(L).getMax(this); } /// PushLoopPHIs - Push PHI nodes in the header of the given loop @@ -3849,33 +4014,31 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { - // Initially insert a CouldNotCompute for this loop. If the insertion + // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = - BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo())); if (!Pair.second) return Pair.first->second; - BackedgeTakenInfo Result = getCouldNotCompute(); - BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); - if (Computed.Exact != getCouldNotCompute()) { - assert(isLoopInvariant(Computed.Exact, L) && - isLoopInvariant(Computed.Max, L) && + // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it + // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result + // must be cleared in this scope. + BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); + + if (Result.getExact(this) != getCouldNotCompute()) { + assert(isLoopInvariant(Result.getExact(this), L) && + isLoopInvariant(Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; - - // Update the value in the map. - Result = Computed; - } else { - if (Computed.Max != getCouldNotCompute()) - // Update the value in the map. - Result = Computed; - if (isa<PHINode>(L->getHeader()->begin())) - // Only count loops that have phi nodes as not being computable. - ++NumTripCountsNotComputed; + } + else if (Result.getMax(this) == getCouldNotCompute() && + isa<PHINode>(L->getHeader()->begin())) { + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; } // Now that we know more about the trip count for this loop, forget any @@ -3883,7 +4046,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (Computed.hasAnyInfo()) { + if (Result.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); @@ -3928,7 +4091,12 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { /// compute a trip count, or if the loop is deleted. void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. - BackedgeTakenCounts.erase(L); + DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos = + BackedgeTakenCounts.find(L); + if (BTCPos != BackedgeTakenCounts.end()) { + BTCPos->second.clear(); + BackedgeTakenCounts.erase(BTCPos); + } // Drop information about expressions based on loop-header PHIs. SmallVector<Instruction *, 16> Worklist; @@ -3984,6 +4152,85 @@ void ScalarEvolution::forgetValue(Value *V) { } } +/// getExact - Get the exact loop backedge taken count considering all loop +/// exits. If all exits are computable, this is the minimum computed count. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { + // If any exits were not computable, the loop is not computable. + if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); + + // We need at least one computable exit. + if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); + assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); + + const SCEV *BECount = 0; + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + + if (!BECount) + BECount = ENT->ExactNotTaken; + else + BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + } + assert(BECount && "Invalid not taken count for loop exit"); + return BECount; +} + +/// getExact - Get the exact not taken count for this loop exit. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, + ScalarEvolution *SE) const { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExitingBlock == ExitingBlock) + return ENT->ExactNotTaken; + } + return SE->getCouldNotCompute(); +} + +/// getMax - Get the max backedge taken count for the loop. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + return Max ? Max : SE->getCouldNotCompute(); +} + +/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each +/// computable exit into a persistent ExitNotTakenInfo array. +ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( + SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts, + bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + + if (!Complete) + ExitNotTaken.setIncomplete(); + + unsigned NumExits = ExitCounts.size(); + if (NumExits == 0) return; + + ExitNotTaken.ExitingBlock = ExitCounts[0].first; + ExitNotTaken.ExactNotTaken = ExitCounts[0].second; + if (NumExits == 1) return; + + // Handle the rare case of multiple computable exits. + ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + + ExitNotTakenInfo *PrevENT = &ExitNotTaken; + for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { + PrevENT->setNextExit(ENT); + ENT->ExitingBlock = ExitCounts[i].first; + ENT->ExactNotTaken = ExitCounts[i].second; + } +} + +/// clear - Invalidate this result and free the ExitNotTakenInfo array. +void ScalarEvolution::BackedgeTakenInfo::clear() { + ExitNotTaken.ExitingBlock = 0; + ExitNotTaken.ExactNotTaken = 0; + delete[] ExitNotTaken.getNextExit(); +} + /// ComputeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo @@ -3992,38 +4239,31 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. - const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - bool CouldNotComputeBECount = false; + bool CouldComputeBECount = true; + SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BackedgeTakenInfo NewBTI = - ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); - - if (NewBTI.Exact == getCouldNotCompute()) { + ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. - CouldNotComputeBECount = true; - BECount = getCouldNotCompute(); - } else if (!CouldNotComputeBECount) { - if (BECount == getCouldNotCompute()) - BECount = NewBTI.Exact; - else - BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); - } + CouldComputeBECount = false; + else + ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); + if (MaxBECount == getCouldNotCompute()) - MaxBECount = NewBTI.Max; - else if (NewBTI.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); + MaxBECount = EL.Max; + else if (EL.Max != getCouldNotCompute()) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); } - return BackedgeTakenInfo(BECount, MaxBECount); + return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } -/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge -/// of the specified loop will execute if it exits via the specified block. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, - BasicBlock *ExitingBlock) { +/// ComputeExitLimit - Compute the number of times the backedge of the specified +/// loop will execute if it exits via the specified block. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // Okay, we've chosen an exiting block. See what condition causes us to // exit at this block. @@ -4081,95 +4321,91 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, } // Proceed to the next level to examine the exit condition expression. - return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), - ExitBr->getSuccessor(0), - ExitBr->getSuccessor(1)); + return ComputeExitLimitFromCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1)); } -/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the +/// ComputeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, - Value *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. - BackedgeTakenInfo BTI0 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); - BackedgeTakenInfo BTI1 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(TBB)) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. - if (BTI0.Exact == getCouldNotCompute() || - BTI1.Exact == getCouldNotCompute()) + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max == getCouldNotCompute()) - MaxBECount = BTI1.Max; - else if (BTI1.Max == getCouldNotCompute()) - MaxBECount = BTI0.Max; + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; else - MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. assert(L->contains(FBB) && "Loop block has no successor in loop!"); - if (BTI0.Max == BTI1.Max) - MaxBECount = BTI0.Max; - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; } - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. - BackedgeTakenInfo BTI0 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); - BackedgeTakenInfo BTI1 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(FBB)) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. - if (BTI0.Exact == getCouldNotCompute() || - BTI1.Exact == getCouldNotCompute()) + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max == getCouldNotCompute()) - MaxBECount = BTI1.Max; - else if (BTI1.Max == getCouldNotCompute()) - MaxBECount = BTI0.Max; + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; else - MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. assert(L->contains(TBB) && "Loop block has no successor in loop!"); - if (BTI0.Max == BTI1.Max) - MaxBECount = BTI0.Max; - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; } - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) - return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -4185,17 +4421,17 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, } // If it's not an integer or pointer comparison then compute it the hard way. - return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } -/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the +/// ComputeExitLimitFromICmp - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, - ICmpInst *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -4207,8 +4443,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - BackedgeTakenInfo ItCnt = - ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); + ExitLimit ItCnt = + ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); if (ItCnt.hasAnyInfo()) return ItCnt; } @@ -4247,36 +4483,36 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_EQ: { // while (X == Y) // Convert to: while (X-Y == 0) - BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SLT: { - BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, true); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: { - BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), getNotSCEV(RHS), L, true); - if (BTI.hasAnyInfo()) return BTI; + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_ULT: { - BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, false); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_UGT: { - BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), getNotSCEV(RHS), L, false); - if (BTI.hasAnyInfo()) return BTI; + if (EL.hasAnyInfo()) return EL; break; } default: @@ -4290,8 +4526,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, #endif break; } - return - ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } static ConstantInt * @@ -4321,10 +4556,10 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, if (Idx >= CA->getNumOperands()) return 0; // Bogus program Init = cast<Constant>(CA->getOperand(Idx)); } else if (isa<ConstantAggregateZero>(Init)) { - if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { assert(Idx < STy->getNumElements() && "Bad struct index!"); Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { if (Idx >= ATy->getNumElements()) return 0; // Bogus program Init = Constant::getNullValue(ATy->getElementType()); } else { @@ -4338,15 +4573,16 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, return Init; } -/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of +/// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( - LoadInst *LI, - Constant *RHS, - const Loop *L, - ICmpInst::Predicate predicate) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeLoadConstantCompareExitLimit( + LoadInst *LI, + Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + if (LI->isVolatile()) return getCouldNotCompute(); // Check to see if the loaded pointer is a getelementptr of a global. @@ -4431,69 +4667,117 @@ static bool CanConstantFold(const Instruction *I) { return false; } -/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node -/// in the loop that V is derived from. We allow arbitrary operations along the -/// way, but the operands of an operation must either be constants or a value -/// derived from a constant PHI. If this expression does not fit with these -/// constraints, return null. -static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { - // If this is not an instruction, or if this is an instruction outside of the - // loop, it can't be derived from a loop PHI. - Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !L->contains(I)) return 0; +/// Determine whether this instruction can constant evolve within this loop +/// assuming its operands can all constant evolve. +static bool canConstantEvolve(Instruction *I, const Loop *L) { + // An instruction outside of the loop can't be derived from a loop PHI. + if (!L->contains(I)) return false; - if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(I)) { if (L->getHeader() == I->getParent()) - return PN; + return true; else // We don't currently keep track of the control flow needed to evaluate // PHIs, so we cannot handle PHIs inside of loops. - return 0; + return false; } // If we won't be able to constant fold this expression even if the operands - // are constants, return early. - if (!CanConstantFold(I)) return 0; + // are constants, bail early. + return CanConstantFold(I); +} + +/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by +/// recursing through each instruction operand until reaching a loop header phi. +static PHINode * +getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, + DenseMap<Instruction *, PHINode *> &PHIMap) { // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = 0; - for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) - if (!isa<Constant>(I->getOperand(Op))) { - PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); - if (P == 0) return 0; // Not evolving from PHI - if (PHI == 0) - PHI = P; - else if (PHI != P) - return 0; // Evolving from multiple different PHIs. + for (Instruction::op_iterator OpI = UseInst->op_begin(), + OpE = UseInst->op_end(); OpI != OpE; ++OpI) { + + if (isa<Constant>(*OpI)) continue; + + Instruction *OpInst = dyn_cast<Instruction>(*OpI); + if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + + PHINode *P = dyn_cast<PHINode>(OpInst); + if (!P) + // If this operand is already visited, reuse the prior result. + // We may have P != PHI if this is the deepest point at which the + // inconsistent paths meet. + P = PHIMap.lookup(OpInst); + if (!P) { + // Recurse and memoize the results, whether a phi is found or not. + // This recursive call invalidates pointers into PHIMap. + P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); + PHIMap[OpInst] = P; } - + if (P == 0) return 0; // Not evolving from PHI + if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + PHI = P; + } // This is a expression evolving from a constant PHI! return PHI; } +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !canConstantEvolve(I, L)) return 0; + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + return PN; + } + + // Record non-constant instructions contained by the loop. + DenseMap<Instruction *, PHINode *> PHIMap; + return getConstantEvolvingPHIOperands(I, L, PHIMap); +} + /// EvaluateExpression - Given an expression that passes the /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node /// in the loop has the value PHIVal. If we can't fold this expression for some /// reason, return null. -static Constant *EvaluateExpression(Value *V, Constant *PHIVal, +static Constant *EvaluateExpression(Value *V, const Loop *L, + DenseMap<Instruction *, Constant *> &Vals, const TargetData *TD) { - if (isa<PHINode>(V)) return PHIVal; + // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = cast<Instruction>(V); + if (Constant *C = Vals.lookup(I)) return C; + + assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant"); + assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop"); + (void)L; std::vector<Constant*> Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); - if (Operands[i] == 0) return 0; + Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); + if (!Operand) { + Operands[i] = dyn_cast<Constant>(I->getOperand(i)); + if (!Operands[i]) return 0; + continue; + } + Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Vals[Operand] = C; + if (!C) return 0; + Operands[i] = C; } if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], TD); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4514,6 +4798,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis. + DenseMap<Instruction *, Constant *> CurrentIterVals; + // Since the loop is canonicalized, the PHI node must have two entries. One // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. @@ -4522,6 +4809,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); if (StartCST == 0) return RetVal = 0; // Must be a constant. + CurrentIterVals[PN] = StartCST; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); if (getConstantEvolvingPHI(BEValue, L) != PN && @@ -4534,29 +4822,31 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; - for (Constant *PHIVal = StartCST; ; ++IterationNum) { + for (; ; ++IterationNum) { if (IterationNum == NumIterations) - return RetVal = PHIVal; // Got exit value! + return RetVal = CurrentIterVals[PN]; // Got exit value! // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); - if (NextPHI == PHIVal) + // EvaluateExpression adds non-phi values to the CurrentIterVals map. + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + if (NextPHI == CurrentIterVals[PN]) return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! - PHIVal = NextPHI; + DenseMap<Instruction *, Constant *> NextIterVals; + NextIterVals[PN] = NextPHI; + CurrentIterVals.swap(NextIterVals); } } -/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a +/// ComputeExitCountExhaustively - If the loop is known to execute a /// constant number of times (the condition evolves only from constants), /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV * -ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, - Value *Cond, - bool ExitWhen) { +const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); @@ -4583,8 +4873,10 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (Constant *PHIVal = StartCST; IterationNum != MaxIterations; ++IterationNum) { + DenseMap<Instruction *, Constant *> PHIValMap; + PHIValMap[PN] = PHIVal; ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4595,7 +4887,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, } // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); if (NextPHI == 0 || NextPHI == PHIVal) return getCouldNotCompute();// Couldn't evaluate or not making progress... PHIVal = NextPHI; @@ -4703,7 +4995,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Operands[0], Operands[1], TD); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); + Operands, TD); if (!C) return V; return getSCEV(C); } @@ -4925,7 +5217,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { // Compute the two solutions for the quadratic formula. // The divisions must be performed as signed divisions. APInt NegB(-B); - APInt TwoA( A << 1 ); + APInt TwoA(A << 1); if (TwoA.isMinValue()) { const SCEV *CNC = SE.getCouldNotCompute(); return std::make_pair(CNC, CNC); @@ -4940,7 +5232,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(SE.getConstant(Solution1), SE.getConstant(Solution2)); - } // end APIntOps namespace + } // end APIntOps namespace } /// HowFarToZero - Return the number of times a backedge comparing the specified @@ -4950,7 +5242,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// now expressed as a single expression, V = x-y. So the exit test is /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { @@ -5034,8 +5326,19 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) - if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) - return Distance; + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { + ConstantRange CR = getUnsignedRange(Start); + const SCEV *MaxBECount; + if (!CountDown && CR.getUnsignedMin().isMinValue()) + // When counting up, the worst starting value is 1, not 0. + MaxBECount = CR.getUnsignedMax().isMinValue() + ? getConstant(APInt::getMinValue(CR.getBitWidth())) + : getConstant(APInt::getMaxValue(CR.getBitWidth())); + else + MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() + : -CR.getUnsignedMin()); + return ExitLimit(Distance, MaxBECount); + } // If the recurrence is known not to wraparound, unsigned divide computes the // back edge count. We know that the value will either become zero (and thus @@ -5062,7 +5365,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the @@ -5741,7 +6044,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, assert(!isKnownNegative(Step) && "This code doesn't handle negative strides yet!"); - const Type *Ty = Start->getType(); + Type *Ty = Start->getType(); // When Start == End, we have an exact BECount == 0. Short-circuit this case // here because SCEV may not be able to determine that the unsigned division @@ -5760,7 +6063,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, if (!NoWrap) { // Check Add for unsigned overflow. // TODO: More sophisticated things could be done here. - const Type *WideTy = IntegerType::get(getContext(), + Type *WideTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + 1); const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); @@ -5775,7 +6078,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned) { // Only handle: "ADDREC < LoopInvariant". @@ -5882,7 +6185,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } return getCouldNotCompute(); @@ -6090,6 +6393,15 @@ void ScalarEvolution::releaseMemory() { FirstUnknown = 0; ValueExprMap.clear(); + + // Free any extra memory created for ExitNotTakenInfo in the unlikely event + // that a loop had multiple computable exits. + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); + I != E; ++I) { + I->second.clear(); + } + BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index befe6d2..47f0f32 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" @@ -26,7 +27,7 @@ using namespace llvm; /// reusing an existing cast if a suitable one exists, moving an existing /// cast if a suitable one exists but isn't in the right place, or /// creating a new one. -Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, +Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { // Check to see if there is already a cast! @@ -62,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. -Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); assert((Op == Instruction::BitCast || Op == Instruction::PtrToInt || @@ -103,7 +104,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { while ((isa<BitCastInst>(IP) && isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && cast<BitCastInst>(IP)->getOperand(0) != A) || - isa<DbgInfoIntrinsic>(IP)) + isa<DbgInfoIntrinsic>(IP) || + isa<LandingPadInst>(IP)) ++IP; return ReuseOrCreateCast(A, Ty, Op, IP); } @@ -113,7 +115,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP; + while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) || + isa<LandingPadInst>(IP)) + ++IP; return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -160,7 +164,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // If we haven't found this binop, insert it. - Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp")); + Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); BO->setDebugLoc(SaveInsertPt->getDebugLoc()); rememberInstruction(BO); @@ -277,7 +281,7 @@ static bool FactorOutConstant(const SCEV *&S, /// the list. /// static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { unsigned NumAddRecs = 0; for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) @@ -306,7 +310,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, /// into GEP indices. /// static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { // Find the addrecs. SmallVector<const SCEV *, 8> AddRecs; @@ -365,10 +369,10 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, /// Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end, - const PointerType *PTy, - const Type *Ty, + PointerType *PTy, + Type *Ty, Value *V) { - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); SmallVector<Value *, 4> GepIndices; SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -423,7 +427,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, GepIndices.push_back(Scaled); // Collect struct field index operands. - while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + while (StructType *STy = dyn_cast<StructType>(ElTy)) { bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; @@ -451,7 +455,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { - const Type *CTy; + Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { GepIndices.push_back(FieldNo); @@ -474,7 +478,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } - if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) ElTy = ATy->getElementType(); else break; @@ -494,7 +498,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + return ConstantExpr::getGetElementPtr(CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -572,8 +576,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); Value *GEP = Builder.CreateGEP(Casted, - GepIndices.begin(), - GepIndices.end(), + GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); @@ -691,7 +694,7 @@ public: } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); // Collect all the add operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal, and @@ -717,7 +720,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // This is the first operand. Just expand it. Sum = expand(Op); ++I; - } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { // The running sum expression is a pointer. Try to form a getelementptr // at this level with that as the base. SmallVector<const SCEV *, 4> NewOps; @@ -731,7 +734,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { NewOps.push_back(X); } Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); - } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { // The running sum is an integer, and there's a pointer at this level. // Try to form a getelementptr. If the running sum is instructions, // use a SCEVUnknown to avoid re-analyzing them. @@ -762,7 +765,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { } Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); // Collect all the mul operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal. @@ -804,7 +807,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { @@ -841,81 +844,141 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } } +/// Determine if this is a well-behaved chain of instructions leading back to +/// the PHI. If so, it may be reused by expanded expressions. +bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) + return false; + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + if (IncV != PN) + return true; + + return isNormalAddRecExprPHI(PN, IncV, L); +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + switch (IncV->getOpcode()) { + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: + return IncV->getOperand(0) == PN + && L->isLoopInvariant(IncV->getOperand(1)); + case Instruction::BitCast: + IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0)); + if (!IncV) + return false; + // fall-thru to GEP handling + case Instruction::GetElementPtr: { + // This must be a pointer addition of constants (pretty) or some number of + // address-size elements (ugly). + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa<Constant>(*I)) + continue; + // ugly geps have 2 operands. + // i1* is used by the expander to represent an address-size element. + if (IncV->getNumOperands() != 2) + return false; + unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return false; + // Ensure the operands dominate the insertion point. I don't know of a + // case when this would not be true, so this is somewhat untested. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + break; + } + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (IncV && IncV->getOpcode() == Instruction::BitCast) + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + return IncV == PN; + } + default: + return false; + } +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, - const Type *ExpandTy, - const Type *IntTy) { + Type *ExpandTy, + Type *IntTy) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == - SE.getEffectiveSCEVType(Normalized->getType())) && - SE.getSCEV(PN) == Normalized) - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - Instruction *IncV = - cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); - - // Determine if this is a well-behaved chain of instructions leading - // back to the PHI. It probably will be, if we're scanning an inner - // loop already visited by LSR for example, but it wouldn't have - // to be. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (!SE.isSCEVable(PN->getType()) || + (SE.getEffectiveSCEVType(PN->getType()) != + SE.getEffectiveSCEVType(Normalized->getType())) || + SE.getSCEV(PN) != Normalized) + continue; + + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + if (LSRMode) { + if (!isExpandedAddRecExprPHI(PN, IncV, L)) + continue; + } + else { + if (!isNormalAddRecExprPHI(PN, IncV, L)) + continue; + } + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + rememberInstruction(IncV); + if (L == IVIncInsertLoop) do { - if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || - (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) { - IncV = 0; + if (SE.DT->dominates(IncV, IVIncInsertPos)) break; - } - // If any of the operands don't dominate the insert position, bail. - // Addrec operands are always loop-invariant, so this can only happen - // if there are instructions which haven't been hoisted. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) { - IncV = 0; - break; - } - } - if (!IncV) - break; - // Advance to the next instruction. - IncV = dyn_cast<Instruction>(IncV->getOperand(0)); - if (!IncV) - break; - if (IncV->mayHaveSideEffects()) { - IncV = 0; - break; - } + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); } while (IncV != PN); - - if (IncV) { - // Ok, the add recurrence looks usable. - // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); - // Remember the increment. - IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); - rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast<Instruction>(IncV->getOperand(0)); - } while (IncV != PN); - return PN; - } - } + return PN; + } + } // Save the original insertion point so we can restore it when we're done. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); @@ -969,7 +1032,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Value *IncV; // If the PHI is a pointer, use a GEP, otherwise use an add or sub. if (isPointer) { - const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); // If the step isn't constant, don't use an implicitly scaled GEP, because // that would require a multiply inside the loop. if (!isa<ConstantInt>(StepV)) @@ -978,7 +1041,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp"); + IncV = Builder.CreateBitCast(IncV, PN->getType()); rememberInstruction(IncV); } } else { @@ -1001,8 +1064,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { - const Type *STy = S->getType(); - const Type *IntTy = SE.getEffectiveSCEVType(STy); + Type *STy = S->getType(); + Type *IntTy = SE.getEffectiveSCEVType(STy); const Loop *L = S->getLoop(); // Determine a normalized form of this expression, which is the expression @@ -1045,7 +1108,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. - const Type *ExpandTy = PostLoopScale ? IntTy : STy; + Type *ExpandTy = PostLoopScale ? IntTy : STy; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); // Accommodate post-inc mode, if necessary. @@ -1057,6 +1120,14 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); + + // For an expansion to use the postinc form, the client must call + // expandCodeFor with an InsertPoint that is either outside the PostIncLoop + // or dominated by IVIncInsertPos. + assert((!isa<Instruction>(Result) || + SE.DT->dominates(cast<Instruction>(Result), + Builder.GetInsertPoint())) && + "postinc expansion does not dominate use"); } // Re-apply any non-loop-dominating scale. @@ -1069,7 +1140,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { - if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { const SCEV *const OffsetArray[1] = { PostLoopOffset }; Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); } else { @@ -1086,7 +1157,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!CanonicalMode) return expandAddRecExprLiterally(S); - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. @@ -1110,7 +1181,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast<Instruction>(V))); - while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt)) + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || + isa<LandingPadInst>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); @@ -1132,7 +1204,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Dig into the expression to find the pointer base for a GEP. ExposePointerBase(Base, RestArray[0], SE); // If we found a pointer, expand the AddRec with a GEP. - if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { // Make sure the Base isn't something exotic, such as a multiplied // or divided pointer value. In those cases, the result type isn't // actually a pointer type. @@ -1216,35 +1288,35 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateTrunc(V, Ty, "tmp"); + Value *I = Builder.CreateTrunc(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateZExt(V, Ty, "tmp"); + Value *I = Builder.CreateZExt(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateSExt(V, Ty, "tmp"); + Value *I = Builder.CreateSExt(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -1253,7 +1325,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); rememberInstruction(Sel); @@ -1268,7 +1340,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -1277,7 +1349,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); rememberInstruction(Sel); @@ -1290,7 +1362,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) { BasicBlock::iterator IP = I; while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) @@ -1299,7 +1371,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, return expandCodeFor(SH, Ty); } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -1325,7 +1397,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = L->getHeader()->getFirstNonPHI(); + InsertPt = L->getHeader()->getFirstInsertionPt(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); break; @@ -1346,8 +1418,12 @@ Value *SCEVExpander::expand(const SCEV *S) { Value *V = visit(S); // Remember the expanded value for this SCEV at this location. - if (PostIncLoops.empty()) - InsertedExpressions[std::make_pair(S, InsertPt)] = V; + // + // This is independent of PostIncLoops. The mapped value simply materializes + // the expression at this insertion point. If the mapped value happened to be + // a postinc expansion, it could be reused by a non postinc user, but only if + // its insertion point was already at the head of the loop. + InsertedExpressions[std::make_pair(S, InsertPt)] = V; restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; @@ -1384,7 +1460,7 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { /// starts at zero and steps by one on each iteration. PHINode * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, - const Type *Ty) { + Type *Ty) { assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); // Build a SCEV for {0,+,1}<L>. @@ -1401,3 +1477,102 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } + +/// hoistStep - Attempt to hoist an IV increment above a potential use. +/// +/// To successfully hoist, two criteria must be met: +/// - IncV operands dominate InsertPos and +/// - InsertPos dominates IncV +/// +/// Meeting the second condition means that we don't need to check all of IncV's +/// existing uses (it's moving up in the domtree). +/// +/// This does not yet recursively hoist the operands, although that would +/// not be difficult. +/// +/// This does not require a SCEVExpander instance and could be replaced by a +/// general code-insertion helper. +bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, + const DominatorTree *DT) { + if (DT->dominates(IncV, InsertPos)) + return true; + + if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + // Attempt to hoist IncV + for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); + OI != OE; ++OI) { + Instruction *OInst = dyn_cast<Instruction>(OI); + if (OInst && !DT->dominates(OInst, InsertPos)) + return false; + } + IncV->moveBefore(InsertPos); + return true; +} + +/// replaceCongruentIVs - Check for congruent phis in this loop header and +/// replace them with their most canonical representative. Return the number of +/// phis eliminated. +/// +/// This does not depend on any SCEVExpander state but should be used in +/// the same context that SCEVExpander is used. +unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakVH> &DeadInsts) { + unsigned NumElim = 0; + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE.isSCEVable(Phi->getType())) + continue; + + PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; + if (!OrigPhiRef) { + OrigPhiRef = Phi; + continue; + } + + // If one phi derives from the other via GEPs, types may differ. + // We could consider adding a bitcast here to handle it. + if (OrigPhiRef->getType() != Phi->getType()) + continue; + + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + + // If this phi is more canonical, swap it with the original. + if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) + && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + std::swap(OrigPhiRef, Phi); + std::swap(OrigInc, IsomorphicInc); + } + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's often the head of an IV user cycle that + // is isomorphic with the original phi. So it's worth eagerly cleaning up + // the common case of a single IV increment. + if (OrigInc != IsomorphicInc && + OrigInc->getType() == IsomorphicInc->getType() && + SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && + hoistStep(OrigInc, IsomorphicInc, DT)) { + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + IsomorphicInc->replaceAllUsesWith(OrigInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElim; + Phi->replaceAllUsesWith(OrigPhiRef); + DeadInsts.push_back(Phi); + } + return NumElim; +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index 60e630a..c66ecd6 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -60,20 +60,40 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, return true; } -const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT) { - if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) - return S; +namespace { + +/// Hold the state used during post-inc expression transformation, including a +/// map of transformed expressions. +class PostIncTransform { + TransformKind Kind; + PostIncLoopSet &Loops; + ScalarEvolution &SE; + DominatorTree &DT; + + DenseMap<const SCEV*, const SCEV*> Transformed; + +public: + PostIncTransform(TransformKind kind, PostIncLoopSet &loops, + ScalarEvolution &se, DominatorTree &dt): + Kind(kind), Loops(loops), SE(se), DT(dt) {} + + const SCEV *TransformSubExpr(const SCEV *S, Instruction *User, + Value *OperandValToReplace); + +protected: + const SCEV *TransformImpl(const SCEV *S, Instruction *User, + Value *OperandValToReplace); +}; + +} // namespace + +/// Implement post-inc transformation for all valid expression types. +const SCEV *PostIncTransform:: +TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { const SCEV *O = X->getOperand(); - const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); if (O != N) switch (S->getSCEVType()) { case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); @@ -93,9 +113,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { - const SCEV *O = *I; - const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); - Operands.push_back(N); + Operands.push_back(TransformSubExpr(*I, LUser, 0)); } // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); @@ -104,8 +122,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, case NormalizeAutodetect: if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); Result = SE.getMinusSCEV(Result, TransformedStep); Loops.insert(L); } @@ -114,24 +132,20 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, // sometimes fails to canonicalize two equal SCEVs to exactly the same // form. It's possibly a pessimization when this happens, but it isn't a // correctness problem, so disable this assert for now. - assert(S == TransformForPostIncUse(Denormalize, Result, - User, OperandValToReplace, - Loops, SE, DT) && + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && "SCEV normalization is not invertible!"); #endif break; case Normalize: if (Loops.count(L)) { const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); Result = SE.getMinusSCEV(Result, TransformedStep); } #if 0 // See the comment on the assert above. - assert(S == TransformForPostIncUse(Denormalize, Result, - User, OperandValToReplace, - Loops, SE, DT) && + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && "SCEV normalization is not invertible!"); #endif break; @@ -150,8 +164,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); I != E; ++I) { const SCEV *O = *I; - const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); Changed |= N != O; Operands.push_back(N); } @@ -170,10 +183,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { const SCEV *LO = X->getLHS(); const SCEV *RO = X->getRHS(); - const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, - Loops, SE, DT); - const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace); + const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace); if (LO != LN || RO != RN) return SE.getUDivExpr(LN, RN); return S; @@ -182,3 +193,33 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, llvm_unreachable("Unexpected SCEV kind!"); return 0; } + +/// Manage recursive transformation across an expression DAG. Revisiting +/// expressions would lead to exponential recursion. +const SCEV *PostIncTransform:: +TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { + + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + + const SCEV *Result = Transformed.lookup(S); + if (Result) + return Result; + + Result = TransformImpl(S, User, OperandValToReplace); + Transformed[S] = Result; + return Result; +} + +/// Top level driver for transforming an expression DAG into its requested +/// post-inc form (either "Normalized" or "Denormalized". +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + PostIncTransform Transform(Kind, Loops, SE, DT); + return Transform.TransformSubExpr(S, User, OperandValToReplace); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 455c910..4d94f61 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -34,7 +34,7 @@ const unsigned MaxDepth = 6; /// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if /// unknown returns 0). For vector types, returns the element type's bitwidth. -static unsigned getBitWidth(const Type *Ty, const TargetData *TD) { +static unsigned getBitWidth(Type *Ty, const TargetData *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; assert(isa<PointerType>(Ty) && "Expected a pointer type!"); @@ -103,7 +103,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { - const Type *ObjectType = GV->getType()->getElementType(); + Type *ObjectType = GV->getType()->getElementType(); // If the object is defined in the current Module, we'll be giving // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. @@ -268,7 +268,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { - const Type *SrcTy = I->getOperand(0)->getType(); + Type *SrcTy = I->getOperand(0)->getType(); unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint @@ -291,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::BitCast: { - const Type *SrcTy = I->getOperand(0)->getType(); + Type *SrcTy = I->getOperand(0)->getType(); if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) @@ -559,7 +559,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { Value *Index = I->getOperand(i); - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. if (!TD) return; const StructLayout *SL = TD->getStructLayout(STy); @@ -569,7 +569,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, CountTrailingZeros_64(Offset)); } else { // Handle array index arithmetic. - const Type *IndexedTy = GTI.getIndexedType(); + Type *IndexedTy = GTI.getIndexedType(); if (!IndexedTy->isSized()) return; unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; @@ -898,7 +898,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : Ty->getScalarSizeInBits(); unsigned Tmp, Tmp2; @@ -1078,7 +1078,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); - const Type *T = V->getType(); + Type *T = V->getType(); ConstantInt *CI = dyn_cast<ConstantInt>(V); @@ -1315,11 +1315,11 @@ Value *llvm::isBytewiseValue(Value *V) { // indices from Idxs that should be left out when inserting into the resulting // struct. To is the result struct built so far, new insertvalue instructions // build on that. -static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, +static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector<unsigned, 10> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1358,8 +1358,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, return NULL; // Insert the value in the new (sub) aggregrate - return llvm::InsertValueInst::Create(To, V, - ArrayRef<unsigned>(Idxs).slice(IdxSkip), + return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), "tmp", InsertBefore); } @@ -1378,7 +1377,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); - const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), idx_range); Value *To = UndefValue::get(IndexedType); SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); @@ -1404,7 +1403,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, && "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && "Invalid indices for type?"); - const CompositeType *PTy = cast<CompositeType>(V->getType()); + CompositeType *PTy = cast<CompositeType>(V->getType()); if (isa<UndefValue>(V)) return UndefValue::get(ExtractValueInst::getIndexedType(PTy, @@ -1435,9 +1434,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, - ArrayRef<unsigned>(idx_range.begin(), - req_idx), + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); else // We can't handle this without inserting insertvalues @@ -1455,7 +1452,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), - ArrayRef<unsigned>(req_idx, idx_range.end()), + makeArrayRef(req_idx, idx_range.end()), InsertBefore); } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from @@ -1506,7 +1503,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, if (OpC->isZero()) continue; // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -1557,8 +1554,8 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, return false; // Make sure the index-ee is a pointer to array of i8. - const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); - const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); + ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index 3c63106..d0dd986 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -506,6 +506,15 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(deplibs); KEYWORD(datalayout); KEYWORD(volatile); + KEYWORD(atomic); + KEYWORD(unordered); + KEYWORD(monotonic); + KEYWORD(acquire); + KEYWORD(release); + KEYWORD(acq_rel); + KEYWORD(seq_cst); + KEYWORD(singlethread); + KEYWORD(nuw); KEYWORD(nsw); KEYWORD(exact); @@ -549,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(readnone); KEYWORD(readonly); KEYWORD(uwtable); + KEYWORD(returns_twice); KEYWORD(inlinehint); KEYWORD(noinline); @@ -559,7 +569,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noredzone); KEYWORD(noimplicitfloat); KEYWORD(naked); - KEYWORD(hotpatch); KEYWORD(nonlazybind); KEYWORD(type); @@ -570,8 +579,16 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); + KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); + KEYWORD(umin); + KEYWORD(x); KEYWORD(blockaddress); + + KEYWORD(personality); + KEYWORD(cleanup); + KEYWORD(catch); + KEYWORD(filter); #undef KEYWORD // Keywords for types. @@ -624,12 +641,16 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(switch, Switch); INSTKEYWORD(indirectbr, IndirectBr); INSTKEYWORD(invoke, Invoke); + INSTKEYWORD(resume, Resume); INSTKEYWORD(unwind, Unwind); INSTKEYWORD(unreachable, Unreachable); INSTKEYWORD(alloca, Alloca); INSTKEYWORD(load, Load); INSTKEYWORD(store, Store); + INSTKEYWORD(cmpxchg, AtomicCmpXchg); + INSTKEYWORD(atomicrmw, AtomicRMW); + INSTKEYWORD(fence, Fence); INSTKEYWORD(getelementptr, GetElementPtr); INSTKEYWORD(extractelement, ExtractElement); @@ -637,6 +658,7 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(shufflevector, ShuffleVector); INSTKEYWORD(extractvalue, ExtractValue); INSTKEYWORD(insertvalue, InsertValue); + INSTKEYWORD(landingpad, LandingPad); #undef INSTKEYWORD // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by @@ -704,17 +726,17 @@ lltok::Kind LLLexer::Lex0x() { case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) FP80HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(80, 2, Pair)); + APFloatVal = APFloat(APInt(80, Pair)); return lltok::APFloat; case 'L': // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, 2, Pair), true); + APFloatVal = APFloat(APInt(128, Pair), true); return lltok::APFloat; case 'M': // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, 2, Pair)); + APFloatVal = APFloat(APInt(128, Pair)); return lltok::APFloat; } } diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index cfc31f3..cafaab0 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -26,7 +26,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static std::string getTypeString(const Type *T) { +static std::string getTypeString(Type *T) { std::string Result; raw_string_ostream Tmp(Result); Tmp << *T; @@ -120,6 +120,9 @@ bool LLParser::ValidateEndOfModule() { for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove + // Upgrade to new EH scheme. N.B. This will go away in 3.1. + UpgradeExceptionHandling(M); + // Check debug info intrinsics. CheckDebugInfoIntrinsics(M); return false; @@ -744,9 +747,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, /// GetGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. -GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, +GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, LocTy Loc) { - const PointerType *PTy = dyn_cast<PointerType>(Ty); + PointerType *PTy = dyn_cast<PointerType>(Ty); if (PTy == 0) { Error(Loc, "global variable reference must have pointer type"); return 0; @@ -775,7 +778,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; - if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) + if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, @@ -785,8 +788,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, return FwdVal; } -GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { - const PointerType *PTy = dyn_cast<PointerType>(Ty); +GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { + PointerType *PTy = dyn_cast<PointerType>(Ty); if (PTy == 0) { Error(Loc, "global variable reference must have pointer type"); return 0; @@ -813,7 +816,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; - if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) + if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, @@ -908,6 +911,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break; case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break; case lltok::kw_uwtable: Attrs |= Attribute::UWTable; break; + case lltok::kw_returns_twice: Attrs |= Attribute::ReturnsTwice; break; case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; @@ -919,7 +923,6 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break; case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_naked: Attrs |= Attribute::Naked; break; - case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_alignstack: { @@ -1145,6 +1148,32 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, return false; } +/// ParseScopeAndOrdering +/// if isAtomic: ::= 'singlethread'? AtomicOrdering +/// else: ::= +/// +/// This sets Scope and Ordering to the parsed values. +bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, + AtomicOrdering &Ordering) { + if (!isAtomic) + return false; + + Scope = CrossThread; + if (EatIfPresent(lltok::kw_singlethread)) + Scope = SingleThread; + switch (Lex.getKind()) { + default: return TokError("Expected ordering on atomic instruction"); + case lltok::kw_unordered: Ordering = Unordered; break; + case lltok::kw_monotonic: Ordering = Monotonic; break; + case lltok::kw_acquire: Ordering = Acquire; break; + case lltok::kw_release: Ordering = Release; break; + case lltok::kw_acq_rel: Ordering = AcquireRelease; break; + case lltok::kw_seq_cst: Ordering = SequentiallyConsistent; break; + } + Lex.Lex(); + return false; +} + /// ParseOptionalStackAlignment /// ::= /* empty */ /// ::= 'alignstack' '(' 4 ')' @@ -1237,7 +1266,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). if (Entry.first == 0) { - Entry.first = StructType::createNamed(Context, Lex.getStrVal()); + Entry.first = StructType::create(Context, Lex.getStrVal()); Entry.second = Lex.getLoc(); } Result = Entry.first; @@ -1254,7 +1283,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). if (Entry.first == 0) { - Entry.first = StructType::createNamed(Context, ""); + Entry.first = StructType::create(Context); Entry.second = Lex.getLoc(); } Result = Entry.first; @@ -1476,7 +1505,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, // If this type number has never been uttered, create it. if (Entry.first == 0) - Entry.first = StructType::createNamed(Context, Name); + Entry.first = StructType::create(Context, Name); ResultTy = Entry.first; return false; } @@ -1502,7 +1531,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, // If this type number has never been uttered, create it. if (Entry.first == 0) - Entry.first = StructType::createNamed(Context, Name); + Entry.first = StructType::create(Context, Name); StructType *STy = cast<StructType>(Entry.first); @@ -1668,7 +1697,7 @@ bool LLParser::PerFunctionState::FinishFunction() { /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. Value *LLParser::PerFunctionState::GetVal(const std::string &Name, - const Type *Ty, LocTy Loc) { + Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = F.getValueSymbolTable().lookup(Name); @@ -1709,7 +1738,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, return FwdVal; } -Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, +Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; @@ -2273,16 +2302,11 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy()) return Error(ID.Loc, "getelementptr requires pointer operand"); - if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), - (Value**)(Elts.data() + 1), - Elts.size() - 1)) + ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); + if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices)) return Error(ID.Loc, "invalid indices for getelementptr"); - ID.ConstantVal = InBounds ? - ConstantExpr::getInBoundsGetElementPtr(Elts[0], - Elts.data() + 1, - Elts.size() - 1) : - ConstantExpr::getGetElementPtr(Elts[0], - Elts.data() + 1, Elts.size() - 1); + ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices, + InBounds); } else if (Opc == Instruction::Select) { if (Elts.size() != 3) return Error(ID.Loc, "expected three operands to select"); @@ -2323,7 +2347,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } /// ParseGlobalValue - Parse a global value with the specified type. -bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) { +bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) { C = 0; ValID ID; Value *V = NULL; @@ -2410,7 +2434,7 @@ bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) { // Function Parsing. //===----------------------------------------------------------------------===// -bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, +bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PerFunctionState *PFS) { if (Ty->isFunctionTy()) return Error(ID.Loc, "functions are not values, refer to them as pointers"); @@ -2426,8 +2450,8 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = PFS->GetVal(ID.StrVal, Ty, ID.Loc); return (V == 0); case ValID::t_InlineAsm: { - const PointerType *PTy = dyn_cast<PointerType>(Ty); - const FunctionType *FTy = + PointerType *PTy = dyn_cast<PointerType>(Ty); + FunctionType *FTy = PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); @@ -2506,7 +2530,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, return false; case ValID::t_ConstantStruct: case ValID::t_PackedConstantStruct: - if (const StructType *ST = dyn_cast<StructType>(Ty)) { + if (StructType *ST = dyn_cast<StructType>(Ty)) { if (ST->getNumElements() != ID.UIntVal) return Error(ID.Loc, "initializer with struct type has wrong # elements"); @@ -2519,15 +2543,15 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, return Error(ID.Loc, "element " + Twine(i) + " of struct initializer doesn't match struct element type"); - V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts, - ID.UIntVal)); + V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts, + ID.UIntVal)); } else return Error(ID.Loc, "constant expression type mismatch"); return false; } } -bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) { +bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) { V = 0; ValID ID; return ParseValID(ID, PFS) || @@ -2671,9 +2695,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); - const FunctionType *FT = + FunctionType *FT = FunctionType::get(RetType, ParamTypeList, isVarArg); - const PointerType *PFT = PointerType::getUnqual(FT); + PointerType *PFT = PointerType::getUnqual(FT); Fn = 0; if (!FunctionName.empty()) { @@ -2864,6 +2888,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_switch: return ParseSwitch(Inst, PFS); case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS); case lltok::kw_invoke: return ParseInvoke(Inst, PFS); + case lltok::kw_resume: return ParseResume(Inst, PFS); // Binary Operators. case lltok::kw_add: case lltok::kw_sub: @@ -2923,13 +2948,18 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS); case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS); case lltok::kw_phi: return ParsePHI(Inst, PFS); + case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS); case lltok::kw_call: return ParseCall(Inst, PFS, false); case lltok::kw_tail: return ParseCall(Inst, PFS, true); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); case lltok::kw_load: return ParseLoad(Inst, PFS, false); case lltok::kw_store: return ParseStore(Inst, PFS, false); + case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS); + case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS); + case lltok::kw_fence: return ParseFence(Inst, PFS); case lltok::kw_volatile: + // For compatibility; canonical location is after load if (EatIfPresent(lltok::kw_load)) return ParseLoad(Inst, PFS, true); else if (EatIfPresent(lltok::kw_store)) @@ -3162,8 +3192,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. - const PointerType *PFTy = 0; - const FunctionType *Ty = 0; + PointerType *PFTy = 0; + FunctionType *Ty = 0; if (!(PFTy = dyn_cast<PointerType>(RetType)) || !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) { // Pull out the types of all of the arguments... @@ -3194,7 +3224,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { FunctionType::param_iterator I = Ty->param_begin(); FunctionType::param_iterator E = Ty->param_end(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { - const Type *ExpectedTy = 0; + Type *ExpectedTy = 0; if (I != E) { ExpectedTy = *I++; } else if (!Ty->isVarArg()) { @@ -3225,7 +3255,17 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return false; } +/// ParseResume +/// ::= 'resume' TypeAndValue +bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) { + Value *Exn; LocTy ExnLoc; + if (ParseTypeAndValue(Exn, ExnLoc, PFS)) + return true; + ResumeInst *RI = ResumeInst::Create(Exn); + Inst = RI; + return false; +} //===----------------------------------------------------------------------===// // Binary Operators. @@ -3473,6 +3513,56 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { return AteExtraComma ? InstExtraComma : InstNormal; } +/// ParseLandingPad +/// ::= 'landingpad' Type 'personality' TypeAndValue 'cleanup'? Clause+ +/// Clause +/// ::= 'catch' TypeAndValue +/// ::= 'filter' +/// ::= 'filter' TypeAndValue ( ',' TypeAndValue )* +bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { + Type *Ty = 0; LocTy TyLoc; + Value *PersFn; LocTy PersFnLoc; + + if (ParseType(Ty, TyLoc) || + ParseToken(lltok::kw_personality, "expected 'personality'") || + ParseTypeAndValue(PersFn, PersFnLoc, PFS)) + return true; + + LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0); + LP->setCleanup(EatIfPresent(lltok::kw_cleanup)); + + while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){ + LandingPadInst::ClauseType CT; + if (EatIfPresent(lltok::kw_catch)) + CT = LandingPadInst::Catch; + else if (EatIfPresent(lltok::kw_filter)) + CT = LandingPadInst::Filter; + else + return TokError("expected 'catch' or 'filter' clause type"); + + Value *V; LocTy VLoc; + if (ParseTypeAndValue(V, VLoc, PFS)) { + delete LP; + return true; + } + + // A 'catch' type expects a non-array constant. A filter clause expects an + // array constant. + if (CT == LandingPadInst::Catch) { + if (isa<ArrayType>(V->getType())) + Error(VLoc, "'catch' clause has an invalid type"); + } else { + if (!isa<ArrayType>(V->getType())) + Error(VLoc, "'filter' clause has an invalid type"); + } + + LP->addClause(V); + } + + Inst = LP; + return false; +} + /// ParseCall /// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value /// ParameterList OptionalAttrs @@ -3498,8 +3588,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. - const PointerType *PFTy = 0; - const FunctionType *Ty = 0; + PointerType *PFTy = 0; + FunctionType *Ty = 0; if (!(PFTy = dyn_cast<PointerType>(RetType)) || !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) { // Pull out the types of all of the arguments... @@ -3530,7 +3620,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, FunctionType::param_iterator I = Ty->param_begin(); FunctionType::param_iterator E = Ty->param_end(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { - const Type *ExpectedTy = 0; + Type *ExpectedTy = 0; if (I != E) { ExpectedTy = *I++; } else if (!Ty->isVarArg()) { @@ -3596,34 +3686,85 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseLoad -/// ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)? +/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)? +/// ::= 'load' 'atomic' 'volatile'? TypeAndValue +/// 'singlethread'? AtomicOrdering (',' 'align' i32)? +/// Compatibility: +/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val; LocTy Loc; unsigned Alignment = 0; bool AteExtraComma = false; + bool isAtomic = false; + AtomicOrdering Ordering = NotAtomic; + SynchronizationScope Scope = CrossThread; + + if (Lex.getKind() == lltok::kw_atomic) { + if (isVolatile) + return TokError("mixing atomic with old volatile placement"); + isAtomic = true; + Lex.Lex(); + } + + if (Lex.getKind() == lltok::kw_volatile) { + if (isVolatile) + return TokError("duplicate volatile before and after store"); + isVolatile = true; + Lex.Lex(); + } + if (ParseTypeAndValue(Val, Loc, PFS) || + ParseScopeAndOrdering(isAtomic, Scope, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; if (!Val->getType()->isPointerTy() || !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType()) return Error(Loc, "load operand must be a pointer to a first class type"); + if (isAtomic && !Alignment) + return Error(Loc, "atomic load must have explicit non-zero alignment"); + if (Ordering == Release || Ordering == AcquireRelease) + return Error(Loc, "atomic load cannot use Release ordering"); - Inst = new LoadInst(Val, "", isVolatile, Alignment); + Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope); return AteExtraComma ? InstExtraComma : InstNormal; } /// ParseStore -/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)? + +/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)? +/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue +/// 'singlethread'? AtomicOrdering (',' 'align' i32)? +/// Compatibility: +/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)? int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val, *Ptr; LocTy Loc, PtrLoc; unsigned Alignment = 0; bool AteExtraComma = false; + bool isAtomic = false; + AtomicOrdering Ordering = NotAtomic; + SynchronizationScope Scope = CrossThread; + + if (Lex.getKind() == lltok::kw_atomic) { + if (isVolatile) + return TokError("mixing atomic with old volatile placement"); + isAtomic = true; + Lex.Lex(); + } + + if (Lex.getKind() == lltok::kw_volatile) { + if (isVolatile) + return TokError("duplicate volatile before and after store"); + isVolatile = true; + Lex.Lex(); + } + if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseScopeAndOrdering(isAtomic, Scope, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -3633,11 +3774,131 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, return Error(Loc, "store operand must be a first class value"); if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType()) return Error(Loc, "stored value and pointer type do not match"); + if (isAtomic && !Alignment) + return Error(Loc, "atomic store must have explicit non-zero alignment"); + if (Ordering == Acquire || Ordering == AcquireRelease) + return Error(Loc, "atomic store cannot use Acquire ordering"); + + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope); + return AteExtraComma ? InstExtraComma : InstNormal; +} - Inst = new StoreInst(Val, Ptr, isVolatile, Alignment); +/// ParseCmpXchg +/// ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue +/// 'singlethread'? AtomicOrdering +int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { + Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc; + bool AteExtraComma = false; + AtomicOrdering Ordering = NotAtomic; + SynchronizationScope Scope = CrossThread; + bool isVolatile = false; + + if (EatIfPresent(lltok::kw_volatile)) + isVolatile = true; + + if (ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after cmpxchg address") || + ParseTypeAndValue(Cmp, CmpLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") || + ParseTypeAndValue(New, NewLoc, PFS) || + ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + return true; + + if (Ordering == Unordered) + return TokError("cmpxchg cannot be unordered"); + if (!Ptr->getType()->isPointerTy()) + return Error(PtrLoc, "cmpxchg operand must be a pointer"); + if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType()) + return Error(CmpLoc, "compare value and pointer type do not match"); + if (cast<PointerType>(Ptr->getType())->getElementType() != New->getType()) + return Error(NewLoc, "new value and pointer type do not match"); + if (!New->getType()->isIntegerTy()) + return Error(NewLoc, "cmpxchg operand must be an integer"); + unsigned Size = New->getType()->getPrimitiveSizeInBits(); + if (Size < 8 || (Size & (Size - 1))) + return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized" + " integer"); + + AtomicCmpXchgInst *CXI = + new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, Scope); + CXI->setVolatile(isVolatile); + Inst = CXI; return AteExtraComma ? InstExtraComma : InstNormal; } +/// ParseAtomicRMW +/// ::= 'atomicrmw' 'volatile'? BinOp TypeAndValue ',' TypeAndValue +/// 'singlethread'? AtomicOrdering +int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { + Value *Ptr, *Val; LocTy PtrLoc, ValLoc; + bool AteExtraComma = false; + AtomicOrdering Ordering = NotAtomic; + SynchronizationScope Scope = CrossThread; + bool isVolatile = false; + AtomicRMWInst::BinOp Operation; + + if (EatIfPresent(lltok::kw_volatile)) + isVolatile = true; + + switch (Lex.getKind()) { + default: return TokError("expected binary operation in atomicrmw"); + case lltok::kw_xchg: Operation = AtomicRMWInst::Xchg; break; + case lltok::kw_add: Operation = AtomicRMWInst::Add; break; + case lltok::kw_sub: Operation = AtomicRMWInst::Sub; break; + case lltok::kw_and: Operation = AtomicRMWInst::And; break; + case lltok::kw_nand: Operation = AtomicRMWInst::Nand; break; + case lltok::kw_or: Operation = AtomicRMWInst::Or; break; + case lltok::kw_xor: Operation = AtomicRMWInst::Xor; break; + case lltok::kw_max: Operation = AtomicRMWInst::Max; break; + case lltok::kw_min: Operation = AtomicRMWInst::Min; break; + case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break; + case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break; + } + Lex.Lex(); // Eat the operation. + + if (ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after atomicrmw address") || + ParseTypeAndValue(Val, ValLoc, PFS) || + ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + return true; + + if (Ordering == Unordered) + return TokError("atomicrmw cannot be unordered"); + if (!Ptr->getType()->isPointerTy()) + return Error(PtrLoc, "atomicrmw operand must be a pointer"); + if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType()) + return Error(ValLoc, "atomicrmw value and pointer type do not match"); + if (!Val->getType()->isIntegerTy()) + return Error(ValLoc, "atomicrmw operand must be an integer"); + unsigned Size = Val->getType()->getPrimitiveSizeInBits(); + if (Size < 8 || (Size & (Size - 1))) + return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" + " integer"); + + AtomicRMWInst *RMWI = + new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope); + RMWI->setVolatile(isVolatile); + Inst = RMWI; + return AteExtraComma ? InstExtraComma : InstNormal; +} + +/// ParseFence +/// ::= 'fence' 'singlethread'? AtomicOrdering +int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { + AtomicOrdering Ordering = NotAtomic; + SynchronizationScope Scope = CrossThread; + if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + return true; + + if (Ordering == Unordered) + return TokError("fence cannot be unordered"); + if (Ordering == Monotonic) + return TokError("fence cannot be monotonic"); + + Inst = new FenceInst(Context, Ordering, Scope); + return InstNormal; +} + /// ParseGetElementPtr /// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { @@ -3663,10 +3924,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Indices.push_back(Val); } - if (!GetElementPtrInst::getIndexedType(Ptr->getType(), - Indices.begin(), Indices.end())) + if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices)) return Error(Loc, "invalid getelementptr indices"); - Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end()); + Inst = GetElementPtrInst::Create(Ptr, Indices); if (InBounds) cast<GetElementPtrInst>(Inst)->setIsInBounds(true); return AteExtraComma ? InstExtraComma : InstNormal; diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h index 9630657..cbc3c23 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.h +++ b/contrib/llvm/lib/AsmParser/LLParser.h @@ -15,6 +15,7 @@ #define LLVM_ASMPARSER_LLPARSER_H #include "LLLexer.h" +#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -142,8 +143,8 @@ namespace llvm { /// GetGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc); - GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc); + GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc); + GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc); // Helper Routines. bool ParseToken(lltok::Kind T, const char *ErrMsg); @@ -178,6 +179,8 @@ namespace llvm { bool ParseOptionalVisibility(unsigned &Visibility); bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, + AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); @@ -249,8 +252,8 @@ namespace llvm { /// GetVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc); - Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc); + Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc); + Value *GetVal(unsigned ID, Type *Ty, LocTy Loc); /// SetInstName - After an instruction is parsed and inserted into its /// basic block, this installs its name. @@ -269,14 +272,14 @@ namespace llvm { BasicBlock *DefineBB(const std::string &Name, LocTy Loc); }; - bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, + bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PerFunctionState *PFS); - bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS); - bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { + bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS); + bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) { return ParseValue(Ty, V, &PFS); } - bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, + bool ParseValue(Type *Ty, Value *&V, LocTy &Loc, PerFunctionState &PFS) { Loc = Lex.getLoc(); return ParseValue(Ty, V, &PFS); @@ -310,7 +313,7 @@ namespace llvm { // Constant Parsing. bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); - bool ParseGlobalValue(const Type *Ty, Constant *&V); + bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); @@ -344,6 +347,7 @@ namespace llvm { bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); + bool ParseResume(Instruction *&Inst, PerFunctionState &PFS); bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, unsigned OperandType); @@ -356,10 +360,14 @@ namespace llvm { bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS); bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); int ParsePHI(Instruction *&I, PerFunctionState &PFS); + bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS); bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); int ParseAlloc(Instruction *&I, PerFunctionState &PFS); int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); + int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS); + int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS); + int ParseFence(Instruction *&I, PerFunctionState &PFS); int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index a5f89fc..8f16772 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -53,6 +53,9 @@ namespace lltok { kw_deplibs, kw_datalayout, kw_volatile, + kw_atomic, + kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst, + kw_singlethread, kw_nuw, kw_nsw, kw_exact, @@ -87,6 +90,7 @@ namespace lltok { kw_readnone, kw_readonly, kw_uwtable, + kw_returns_twice, kw_inlinehint, kw_noinline, @@ -97,7 +101,6 @@ namespace lltok { kw_noredzone, kw_noimplicitfloat, kw_naked, - kw_hotpatch, kw_nonlazybind, kw_type, @@ -107,6 +110,9 @@ namespace lltok { kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, kw_ueq, kw_une, + // atomicrmw operations that aren't also instruction keywords. + kw_xchg, kw_nand, kw_max, kw_min, kw_umax, kw_umin, + // Instruction Opcodes (Opcode in UIntVal). kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul, kw_udiv, kw_sdiv, kw_fdiv, @@ -118,10 +124,13 @@ namespace lltok { kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast, kw_select, kw_va_arg, - kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, + kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter, + + kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume, kw_unreachable, - kw_alloca, kw_load, kw_store, kw_getelementptr, + kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw, + kw_getelementptr, kw_extractelement, kw_insertelement, kw_shufflevector, kw_extractvalue, kw_insertvalue, kw_blockaddress, diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 24c2994..46565f3 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -107,7 +107,7 @@ static int GetDecodedCastOpcode(unsigned Val) { case bitc::CAST_BITCAST : return Instruction::BitCast; } } -static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { +static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) { switch (Val) { default: return -1; case bitc::BINOP_ADD: @@ -131,6 +131,44 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { } } +static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) { + switch (Val) { + default: return AtomicRMWInst::BAD_BINOP; + case bitc::RMW_XCHG: return AtomicRMWInst::Xchg; + case bitc::RMW_ADD: return AtomicRMWInst::Add; + case bitc::RMW_SUB: return AtomicRMWInst::Sub; + case bitc::RMW_AND: return AtomicRMWInst::And; + case bitc::RMW_NAND: return AtomicRMWInst::Nand; + case bitc::RMW_OR: return AtomicRMWInst::Or; + case bitc::RMW_XOR: return AtomicRMWInst::Xor; + case bitc::RMW_MAX: return AtomicRMWInst::Max; + case bitc::RMW_MIN: return AtomicRMWInst::Min; + case bitc::RMW_UMAX: return AtomicRMWInst::UMax; + case bitc::RMW_UMIN: return AtomicRMWInst::UMin; + } +} + +static AtomicOrdering GetDecodedOrdering(unsigned Val) { + switch (Val) { + case bitc::ORDERING_NOTATOMIC: return NotAtomic; + case bitc::ORDERING_UNORDERED: return Unordered; + case bitc::ORDERING_MONOTONIC: return Monotonic; + case bitc::ORDERING_ACQUIRE: return Acquire; + case bitc::ORDERING_RELEASE: return Release; + case bitc::ORDERING_ACQREL: return AcquireRelease; + default: // Map unknown orderings to sequentially-consistent. + case bitc::ORDERING_SEQCST: return SequentiallyConsistent; + } +} + +static SynchronizationScope GetDecodedSynchScope(unsigned Val) { + switch (Val) { + case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread; + default: // Map unknown scopes to cross-thread. + case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread; + } +} + namespace llvm { namespace { /// @brief A class for maintaining the slot number definition @@ -142,7 +180,7 @@ namespace { void *operator new(size_t s) { return User::operator new(s, 1); } - explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context) + explicit ConstantPlaceHolder(Type *Ty, LLVMContext& Context) : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) { Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } @@ -198,7 +236,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) { Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, - const Type *Ty) { + Type *Ty) { if (Idx >= size()) resize(Idx + 1); @@ -213,7 +251,7 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, return C; } -Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) { +Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) { if (Idx >= size()) resize(Idx + 1); @@ -362,7 +400,7 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { // If we have a forward reference, the only possible case is when it is to a // named struct. Just create a placeholder for now. - return TypeList[ID] = StructType::createNamed(Context, ""); + return TypeList[ID] = StructType::create(Context); } /// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable. @@ -630,7 +668,7 @@ bool BitcodeReader::ParseTypeTableBody() { Res->setName(TypeName); TypeList[NumRecords] = 0; } else // Otherwise, create a new struct. - Res = StructType::createNamed(Context, TypeName); + Res = StructType::create(Context, TypeName); TypeName.clear(); SmallVector<Type*, 8> EltTys; @@ -659,7 +697,7 @@ bool BitcodeReader::ParseTypeTableBody() { Res->setName(TypeName); TypeList[NumRecords] = 0; } else // Otherwise, create a new struct with no body. - Res = StructType::createNamed(Context, TypeName); + Res = StructType::create(Context, TypeName); TypeName.clear(); ResultTy = Res; break; @@ -793,7 +831,7 @@ RestartScan: break; case bitc::TYPE_CODE_OPAQUE: // OPAQUE if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0) - ResultTy = StructType::createNamed(Context, ""); + ResultTy = StructType::create(Context); break; case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD if (NextTypeID >= TypeList.size()) break; @@ -804,7 +842,7 @@ RestartScan: // Set a type. if (TypeList[NextTypeID] == 0) - TypeList[NextTypeID] = StructType::createNamed(Context, ""); + TypeList[NextTypeID] = StructType::create(Context); std::vector<Type*> EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { @@ -923,7 +961,7 @@ bool BitcodeReader::ParseOldTypeSymbolTable() { // Only apply the type name to a struct type with no name. if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID])) - if (!STy->isAnonymous() && !STy->hasName()) + if (!STy->isLiteral() && !STy->hasName()) STy->setName(TypeName); TypeName.clear(); break; @@ -1063,7 +1101,7 @@ bool BitcodeReader::ParseMetadata() { unsigned Size = Record.size(); SmallVector<Value*, 8> Elts; for (unsigned i = 0; i != Size; i += 2) { - const Type *Ty = getTypeByID(Record[i]); + Type *Ty = getTypeByID(Record[i]); if (!Ty) return Error("Invalid METADATA_NODE record"); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); @@ -1163,7 +1201,7 @@ bool BitcodeReader::ParseConstants() { SmallVector<uint64_t, 64> Record; // Read all the records for this value table. - const Type *CurTy = Type::getInt32Ty(Context); + Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (1) { unsigned Code = Stream.ReadCode(); @@ -1218,7 +1256,7 @@ bool BitcodeReader::ParseConstants() { Words[i] = DecodeSignRotatedValue(Record[i]); V = ConstantInt::get(Context, APInt(cast<IntegerType>(CurTy)->getBitWidth(), - NumWords, &Words[0])); + Words)); break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] @@ -1233,11 +1271,11 @@ bool BitcodeReader::ParseConstants() { uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; - V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange))); + V = ConstantFP::get(Context, APFloat(APInt(80, Rearrange))); } else if (CurTy->isFP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true)); + V = ConstantFP::get(Context, APFloat(APInt(128, Record), true)); else if (CurTy->isPPC_FP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]))); + V = ConstantFP::get(Context, APFloat(APInt(128, Record))); else V = UndefValue::get(CurTy); break; @@ -1250,18 +1288,18 @@ bool BitcodeReader::ParseConstants() { unsigned Size = Record.size(); std::vector<Constant*> Elts; - if (const StructType *STy = dyn_cast<StructType>(CurTy)) { + if (StructType *STy = dyn_cast<StructType>(CurTy)) { for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], STy->getElementType(i))); V = ConstantStruct::get(STy, Elts); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) { - const Type *EltTy = ATy->getElementType(); + } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) { + Type *EltTy = ATy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantArray::get(ATy, Elts); - } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) { - const Type *EltTy = VTy->getElementType(); + } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) { + Type *EltTy = VTy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantVector::get(Elts); @@ -1274,8 +1312,8 @@ bool BitcodeReader::ParseConstants() { if (Record.empty()) return Error("Invalid CST_AGGREGATE record"); - const ArrayType *ATy = cast<ArrayType>(CurTy); - const Type *EltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(CurTy); + Type *EltTy = ATy->getElementType(); unsigned Size = Record.size(); std::vector<Constant*> Elts; @@ -1288,8 +1326,8 @@ bool BitcodeReader::ParseConstants() { if (Record.empty()) return Error("Invalid CST_AGGREGATE record"); - const ArrayType *ATy = cast<ArrayType>(CurTy); - const Type *EltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(CurTy); + Type *EltTy = ATy->getElementType(); unsigned Size = Record.size(); std::vector<Constant*> Elts; @@ -1335,7 +1373,7 @@ bool BitcodeReader::ParseConstants() { if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { - const Type *OpTy = getTypeByID(Record[1]); + Type *OpTy = getTypeByID(Record[1]); if (!OpTy) return Error("Invalid CE_CAST record"); Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); V = ConstantExpr::getCast(Opc, Op, CurTy); @@ -1347,16 +1385,14 @@ bool BitcodeReader::ParseConstants() { if (Record.size() & 1) return Error("Invalid CE_GEP record"); SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - const Type *ElTy = getTypeByID(Record[i]); + Type *ElTy = getTypeByID(Record[i]); if (!ElTy) return Error("Invalid CE_GEP record"); Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); } - if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP) - V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1], - Elts.size()-1); - else - V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], - Elts.size()-1); + ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); + V = ConstantExpr::getGetElementPtr(Elts[0], Indices, + BitCode == + bitc::CST_CODE_CE_INBOUNDS_GEP); break; } case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#] @@ -1368,7 +1404,7 @@ bool BitcodeReader::ParseConstants() { break; case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record"); - const VectorType *OpTy = + VectorType *OpTy = dyn_cast_or_null<VectorType>(getTypeByID(Record[0])); if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); @@ -1377,7 +1413,7 @@ bool BitcodeReader::ParseConstants() { break; } case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval] - const VectorType *OpTy = dyn_cast<VectorType>(CurTy); + VectorType *OpTy = dyn_cast<VectorType>(CurTy); if (Record.size() < 3 || OpTy == 0) return Error("Invalid CE_INSERTELT record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); @@ -1388,26 +1424,26 @@ bool BitcodeReader::ParseConstants() { break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] - const VectorType *OpTy = dyn_cast<VectorType>(CurTy); + VectorType *OpTy = dyn_cast<VectorType>(CurTy); if (Record.size() < 3 || OpTy == 0) return Error("Invalid CE_SHUFFLEVEC record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); - const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), + Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), OpTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval] - const VectorType *RTy = dyn_cast<VectorType>(CurTy); - const VectorType *OpTy = + VectorType *RTy = dyn_cast<VectorType>(CurTy); + VectorType *OpTy = dyn_cast_or_null<VectorType>(getTypeByID(Record[0])); if (Record.size() < 4 || RTy == 0 || OpTy == 0) return Error("Invalid CE_SHUFVEC_EX record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); - const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), + Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), RTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); @@ -1415,7 +1451,7 @@ bool BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] if (Record.size() < 4) return Error("Invalid CE_CMP record"); - const Type *OpTy = getTypeByID(Record[0]); + Type *OpTy = getTypeByID(Record[0]); if (OpTy == 0) return Error("Invalid CE_CMP record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); @@ -1442,14 +1478,14 @@ bool BitcodeReader::ParseConstants() { AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; - const PointerType *PTy = cast<PointerType>(CurTy); + PointerType *PTy = cast<PointerType>(CurTy); V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record"); - const Type *FnTy = getTypeByID(Record[0]); + Type *FnTy = getTypeByID(Record[0]); if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record"); Function *Fn = dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy)); @@ -1662,7 +1698,7 @@ bool BitcodeReader::ParseModule() { case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) return Error("Invalid MODULE_CODE_GLOBALVAR record"); - const Type *Ty = getTypeByID(Record[0]); + Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record"); if (!Ty->isPointerTy()) return Error("Global not a pointer type!"); @@ -1711,11 +1747,11 @@ bool BitcodeReader::ParseModule() { case bitc::MODULE_CODE_FUNCTION: { if (Record.size() < 8) return Error("Invalid MODULE_CODE_FUNCTION record"); - const Type *Ty = getTypeByID(Record[0]); + Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record"); if (!Ty->isPointerTy()) return Error("Function not a pointer type!"); - const FunctionType *FTy = + FunctionType *FTy = dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType()); if (!FTy) return Error("Function not a pointer to function type!"); @@ -1757,7 +1793,7 @@ bool BitcodeReader::ParseModule() { case bitc::MODULE_CODE_ALIAS: { if (Record.size() < 3) return Error("Invalid MODULE_ALIAS record"); - const Type *Ty = getTypeByID(Record[0]); + Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid MODULE_ALIAS record"); if (!Ty->isPointerTy()) return Error("Function not a pointer type!"); @@ -1823,9 +1859,9 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { if (Code != bitc::ENTER_SUBBLOCK) { - // The ranlib in xcode 4 will align archive members by appending newlines to the - // end of them. If this file size is a multiple of 4 but not 8, we have to read and - // ignore these final 4 bytes :-( + // The ranlib in xcode 4 will align archive members by appending newlines + // to the end of them. If this file size is a multiple of 4 but not 8, we + // have to read and ignore these final 4 bytes :-( if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) @@ -2160,7 +2196,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { OpNum+2 != Record.size()) return Error("Invalid CAST record"); - const Type *ResTy = getTypeByID(Record[OpNum]); + Type *ResTy = getTypeByID(Record[OpNum]); int Opc = GetDecodedCastOpcode(Record[OpNum+1]); if (Opc == -1 || ResTy == 0) return Error("Invalid CAST record"); @@ -2183,7 +2219,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { GEPIdx.push_back(Op); } - I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end()); + I = GetElementPtrInst::Create(BasePtr, GEPIdx); InstructionList.push_back(I); if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP) cast<GetElementPtrInst>(I)->setIsInBounds(true); @@ -2261,8 +2297,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Invalid SELECT record"); // select condition can be either i1 or [N x i1] - if (const VectorType* vector_type = - dyn_cast<const VectorType>(Cond->getType())) { + if (VectorType* vector_type = + dyn_cast<VectorType>(Cond->getType())) { // expect <n x i1> if (vector_type->getElementType() != Type::getInt1Ty(Context)) return Error("Invalid SELECT condition type"); @@ -2381,7 +2417,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] if (Record.size() < 3 || (Record.size() & 1) == 0) return Error("Invalid SWITCH record"); - const Type *OpTy = getTypeByID(Record[0]); + Type *OpTy = getTypeByID(Record[0]); Value *Cond = getFnValueByID(Record[1], OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (OpTy == 0 || Cond == 0 || Default == 0) @@ -2405,7 +2441,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) return Error("Invalid INDIRECTBR record"); - const Type *OpTy = getTypeByID(Record[0]); + Type *OpTy = getTypeByID(Record[0]); Value *Address = getFnValueByID(Record[1], OpTy); if (OpTy == 0 || Address == 0) return Error("Invalid INDIRECTBR record"); @@ -2437,8 +2473,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return Error("Invalid INVOKE record"); - const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType()); - const FunctionType *FTy = !CalleeTy ? 0 : + PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType()); + FunctionType *FTy = !CalleeTy ? 0 : dyn_cast<FunctionType>(CalleeTy->getElementType()); // Check that the right number of fixed parameters are here. @@ -2472,6 +2508,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { cast<InvokeInst>(I)->setAttributes(PAL); break; } + case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval] + unsigned Idx = 0; + Value *Val = 0; + if (getValueTypePair(Record, Idx, NextValueNo, Val)) + return Error("Invalid RESUME record"); + I = ResumeInst::Create(Val); + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_UNWIND: // UNWIND I = new UnwindInst(Context); InstructionList.push_back(I); @@ -2483,7 +2528,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) return Error("Invalid PHI record"); - const Type *Ty = getTypeByID(Record[0]); + Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid PHI record"); PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); @@ -2499,12 +2544,51 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } + case bitc::FUNC_CODE_INST_LANDINGPAD: { + // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?] + unsigned Idx = 0; + if (Record.size() < 4) + return Error("Invalid LANDINGPAD record"); + Type *Ty = getTypeByID(Record[Idx++]); + if (!Ty) return Error("Invalid LANDINGPAD record"); + Value *PersFn = 0; + if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) + return Error("Invalid LANDINGPAD record"); + + bool IsCleanup = !!Record[Idx++]; + unsigned NumClauses = Record[Idx++]; + LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, NumClauses); + LP->setCleanup(IsCleanup); + for (unsigned J = 0; J != NumClauses; ++J) { + LandingPadInst::ClauseType CT = + LandingPadInst::ClauseType(Record[Idx++]); (void)CT; + Value *Val; + + if (getValueTypePair(Record, Idx, NextValueNo, Val)) { + delete LP; + return Error("Invalid LANDINGPAD record"); + } + + assert((CT != LandingPadInst::Catch || + !isa<ArrayType>(Val->getType())) && + "Catch clause has a invalid type!"); + assert((CT != LandingPadInst::Filter || + isa<ArrayType>(Val->getType())) && + "Filter clause has invalid type!"); + LP->addClause(Val); + } + + I = LP; + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] if (Record.size() != 4) return Error("Invalid ALLOCA record"); - const PointerType *Ty = + PointerType *Ty = dyn_cast_or_null<PointerType>(getTypeByID(Record[0])); - const Type *OpTy = getTypeByID(Record[1]); + Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); unsigned Align = Record[3]; if (!Ty || !Size) return Error("Invalid ALLOCA record"); @@ -2523,6 +2607,28 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } + case bitc::FUNC_CODE_INST_LOADATOMIC: { + // LOADATOMIC: [opty, op, align, vol, ordering, synchscope] + unsigned OpNum = 0; + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + OpNum+4 != Record.size()) + return Error("Invalid LOADATOMIC record"); + + + AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); + if (Ordering == NotAtomic || Ordering == Release || + Ordering == AcquireRelease) + return Error("Invalid LOADATOMIC record"); + if (Ordering != NotAtomic && Record[OpNum] == 0) + return Error("Invalid LOADATOMIC record"); + SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); + + I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1, + Ordering, SynchScope); + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; @@ -2536,6 +2642,83 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } + case bitc::FUNC_CODE_INST_STOREATOMIC: { + // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope] + unsigned OpNum = 0; + Value *Val, *Ptr; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || + getValue(Record, OpNum, + cast<PointerType>(Ptr->getType())->getElementType(), Val) || + OpNum+4 != Record.size()) + return Error("Invalid STOREATOMIC record"); + + AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); + if (Ordering == NotAtomic || Ordering == Acquire || + Ordering == AcquireRelease) + return Error("Invalid STOREATOMIC record"); + SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); + if (Ordering != NotAtomic && Record[OpNum] == 0) + return Error("Invalid STOREATOMIC record"); + + I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1, + Ordering, SynchScope); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_CMPXCHG: { + // CMPXCHG:[ptrty, ptr, cmp, new, vol, ordering, synchscope] + unsigned OpNum = 0; + Value *Ptr, *Cmp, *New; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || + getValue(Record, OpNum, + cast<PointerType>(Ptr->getType())->getElementType(), Cmp) || + getValue(Record, OpNum, + cast<PointerType>(Ptr->getType())->getElementType(), New) || + OpNum+3 != Record.size()) + return Error("Invalid CMPXCHG record"); + AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]); + if (Ordering == NotAtomic || Ordering == Unordered) + return Error("Invalid CMPXCHG record"); + SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]); + I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope); + cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_ATOMICRMW: { + // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope] + unsigned OpNum = 0; + Value *Ptr, *Val; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || + getValue(Record, OpNum, + cast<PointerType>(Ptr->getType())->getElementType(), Val) || + OpNum+4 != Record.size()) + return Error("Invalid ATOMICRMW record"); + AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]); + if (Operation < AtomicRMWInst::FIRST_BINOP || + Operation > AtomicRMWInst::LAST_BINOP) + return Error("Invalid ATOMICRMW record"); + AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); + if (Ordering == NotAtomic || Ordering == Unordered) + return Error("Invalid ATOMICRMW record"); + SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); + I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); + cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] + if (2 != Record.size()) + return Error("Invalid FENCE record"); + AtomicOrdering Ordering = GetDecodedOrdering(Record[0]); + if (Ordering == NotAtomic || Ordering == Unordered || + Ordering == Monotonic) + return Error("Invalid FENCE record"); + SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]); + I = new FenceInst(Context, Ordering, SynchScope); + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) @@ -2549,8 +2732,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return Error("Invalid CALL record"); - const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType()); - const FunctionType *FTy = 0; + PointerType *OpTy = dyn_cast<PointerType>(Callee->getType()); + FunctionType *FTy = 0; if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType()); if (!FTy || Record.size() < FTy->getNumParams()+OpNum) return Error("Invalid CALL record"); @@ -2589,9 +2772,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) return Error("Invalid VAARG record"); - const Type *OpTy = getTypeByID(Record[0]); + Type *OpTy = getTypeByID(Record[0]); Value *Op = getFnValueByID(Record[1], OpTy); - const Type *ResTy = getTypeByID(Record[2]); + Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) return Error("Invalid VAARG record"); I = new VAArgInst(Op, ResTy); @@ -2756,6 +2939,9 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { } std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics); + // Upgrade to new EH scheme. N.B. This will go away in 3.1. + UpgradeExceptionHandling(M); + // Check debug info intrinsics. CheckDebugInfoIntrinsics(TheModule); diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h index 1b3bf1a..6e6118c 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h @@ -76,8 +76,8 @@ public: ValuePtrs.resize(N); } - Constant *getConstantFwdRef(unsigned Idx, const Type *Ty); - Value *getValueFwdRef(unsigned Idx, const Type *Ty); + Constant *getConstantFwdRef(unsigned Idx, Type *Ty); + Value *getValueFwdRef(unsigned Idx, Type *Ty); void AssignValue(Value *V, unsigned Idx); @@ -212,7 +212,7 @@ public: private: Type *getTypeByID(unsigned ID); Type *getTypeByIDOrNull(unsigned ID); - Value *getFnValueByID(unsigned ID, const Type *Ty) { + Value *getFnValueByID(unsigned ID, Type *Ty) { if (Ty && Ty->isMetadataTy()) return MDValueList.getValueFwdRef(ID); return ValueList.getValueFwdRef(ID, Ty); @@ -248,7 +248,7 @@ private: return ResVal == 0; } bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot, - const Type *Ty, Value *&ResVal) { + Type *Ty, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, Ty); diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 85d67ce..5b3d969 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -58,7 +58,6 @@ enum { FUNCTION_INST_UNREACHABLE_ABBREV }; - static unsigned GetEncodedCastOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown cast instruction!"); @@ -101,6 +100,44 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { } } +static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) { + switch (Op) { + default: llvm_unreachable("Unknown RMW operation!"); + case AtomicRMWInst::Xchg: return bitc::RMW_XCHG; + case AtomicRMWInst::Add: return bitc::RMW_ADD; + case AtomicRMWInst::Sub: return bitc::RMW_SUB; + case AtomicRMWInst::And: return bitc::RMW_AND; + case AtomicRMWInst::Nand: return bitc::RMW_NAND; + case AtomicRMWInst::Or: return bitc::RMW_OR; + case AtomicRMWInst::Xor: return bitc::RMW_XOR; + case AtomicRMWInst::Max: return bitc::RMW_MAX; + case AtomicRMWInst::Min: return bitc::RMW_MIN; + case AtomicRMWInst::UMax: return bitc::RMW_UMAX; + case AtomicRMWInst::UMin: return bitc::RMW_UMIN; + } +} + +static unsigned GetEncodedOrdering(AtomicOrdering Ordering) { + switch (Ordering) { + default: llvm_unreachable("Unknown atomic ordering"); + case NotAtomic: return bitc::ORDERING_NOTATOMIC; + case Unordered: return bitc::ORDERING_UNORDERED; + case Monotonic: return bitc::ORDERING_MONOTONIC; + case Acquire: return bitc::ORDERING_ACQUIRE; + case Release: return bitc::ORDERING_RELEASE; + case AcquireRelease: return bitc::ORDERING_ACQREL; + case SequentiallyConsistent: return bitc::ORDERING_SEQCST; + } +} + +static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) { + switch (SynchScope) { + default: llvm_unreachable("Unknown synchronization scope"); + case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; + case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; + } +} + static void WriteStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse, BitstreamWriter &Stream) { SmallVector<unsigned, 64> Vals; @@ -199,7 +236,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); - // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); @@ -216,7 +252,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { - const Type *T = TypeList[i]; + Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; @@ -237,7 +273,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { TypeVals.push_back(cast<IntegerType>(T)->getBitWidth()); break; case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(T); + PointerType *PTy = cast<PointerType>(T); // POINTER: [pointee type, address space] Code = bitc::TYPE_CODE_POINTER; TypeVals.push_back(VE.getTypeID(PTy->getElementType())); @@ -247,7 +283,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { break; } case Type::FunctionTyID: { - const FunctionType *FT = cast<FunctionType>(T); + FunctionType *FT = cast<FunctionType>(T); // FUNCTION: [isvararg, attrid, retty, paramty x N] Code = bitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); @@ -259,7 +295,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { break; } case Type::StructTyID: { - const StructType *ST = cast<StructType>(T); + StructType *ST = cast<StructType>(T); // STRUCT: [ispacked, eltty x N] TypeVals.push_back(ST->isPacked()); // Output all of the element types. @@ -267,7 +303,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { E = ST->element_end(); I != E; ++I) TypeVals.push_back(VE.getTypeID(*I)); - if (ST->isAnonymous()) { + if (ST->isLiteral()) { Code = bitc::TYPE_CODE_STRUCT_ANON; AbbrevToUse = StructAnonAbbrev; } else { @@ -286,7 +322,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { break; } case Type::ArrayTyID: { - const ArrayType *AT = cast<ArrayType>(T); + ArrayType *AT = cast<ArrayType>(T); // ARRAY: [numelts, eltty] Code = bitc::TYPE_CODE_ARRAY; TypeVals.push_back(AT->getNumElements()); @@ -295,7 +331,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { break; } case Type::VectorTyID: { - const VectorType *VT = cast<VectorType>(T); + VectorType *VT = cast<VectorType>(T); // VECTOR [numelts, eltty] Code = bitc::TYPE_CODE_VECTOR; TypeVals.push_back(VT->getNumElements()); @@ -372,14 +408,15 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, GV != E; ++GV) { MaxAlignment = std::max(MaxAlignment, GV->getAlignment()); MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType())); - - if (!GV->hasSection()) continue; - // Give section names unique ID's. - unsigned &Entry = SectionMap[GV->getSection()]; - if (Entry != 0) continue; - WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(), - 0/*TODO*/, Stream); - Entry = SectionMap.size(); + if (GV->hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionMap[GV->getSection()]; + if (!Entry) { + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(), + 0/*TODO*/, Stream); + Entry = SectionMap.size(); + } + } } for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { MaxAlignment = std::max(MaxAlignment, F->getAlignment()); @@ -716,7 +753,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, SmallVector<uint64_t, 64> Record; const ValueEnumerator::ValueList &Vals = VE.getValues(); - const Type *LastTy = 0; + Type *LastTy = 0; for (unsigned i = FirstVal; i != LastVal; ++i) { const Value *V = Vals[i].first; // If we need to switch types, do so now. @@ -781,7 +818,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { Code = bitc::CST_CODE_FLOAT; - const Type *Ty = CFP->getType(); + Type *Ty = CFP->getType(); if (Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); } else if (Ty->isX86_FP80Ty()) { @@ -1083,8 +1120,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Invoke: { const InvokeInst *II = cast<InvokeInst>(&I); const Value *Callee(II->getCalledValue()); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_INVOKE; Vals.push_back(VE.getAttributeID(II->getAttributes())); @@ -1105,6 +1142,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, } break; } + case Instruction::Resume: + Code = bitc::FUNC_CODE_INST_RESUME; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + break; case Instruction::Unwind: Code = bitc::FUNC_CODE_INST_UNWIND; break; @@ -1124,6 +1165,23 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; } + case Instruction::LandingPad: { + const LandingPadInst &LP = cast<LandingPadInst>(I); + Code = bitc::FUNC_CODE_INST_LANDINGPAD; + Vals.push_back(VE.getTypeID(LP.getType())); + PushValueAndType(LP.getPersonalityFn(), InstID, Vals, VE); + Vals.push_back(LP.isCleanup()); + Vals.push_back(LP.getNumClauses()); + for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) { + if (LP.isCatch(I)) + Vals.push_back(LandingPadInst::Catch); + else + Vals.push_back(LandingPadInst::Filter); + PushValueAndType(LP.getClause(I), InstID, Vals, VE); + } + break; + } + case Instruction::Alloca: Code = bitc::FUNC_CODE_INST_ALLOCA; Vals.push_back(VE.getTypeID(I.getType())); @@ -1133,24 +1191,66 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; case Instruction::Load: - Code = bitc::FUNC_CODE_INST_LOAD; - if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr - AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; - + if (cast<LoadInst>(I).isAtomic()) { + Code = bitc::FUNC_CODE_INST_LOADATOMIC; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + } else { + Code = bitc::FUNC_CODE_INST_LOAD; + if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr + AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; + } Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1); Vals.push_back(cast<LoadInst>(I).isVolatile()); + if (cast<LoadInst>(I).isAtomic()) { + Vals.push_back(GetEncodedOrdering(cast<LoadInst>(I).getOrdering())); + Vals.push_back(GetEncodedSynchScope(cast<LoadInst>(I).getSynchScope())); + } break; case Instruction::Store: - Code = bitc::FUNC_CODE_INST_STORE; + if (cast<StoreInst>(I).isAtomic()) + Code = bitc::FUNC_CODE_INST_STOREATOMIC; + else + Code = bitc::FUNC_CODE_INST_STORE; PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr Vals.push_back(VE.getValueID(I.getOperand(0))); // val. Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1); Vals.push_back(cast<StoreInst>(I).isVolatile()); + if (cast<StoreInst>(I).isAtomic()) { + Vals.push_back(GetEncodedOrdering(cast<StoreInst>(I).getOrdering())); + Vals.push_back(GetEncodedSynchScope(cast<StoreInst>(I).getSynchScope())); + } + break; + case Instruction::AtomicCmpXchg: + Code = bitc::FUNC_CODE_INST_CMPXCHG; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr + Vals.push_back(VE.getValueID(I.getOperand(1))); // cmp. + Vals.push_back(VE.getValueID(I.getOperand(2))); // newval. + Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile()); + Vals.push_back(GetEncodedOrdering( + cast<AtomicCmpXchgInst>(I).getOrdering())); + Vals.push_back(GetEncodedSynchScope( + cast<AtomicCmpXchgInst>(I).getSynchScope())); + break; + case Instruction::AtomicRMW: + Code = bitc::FUNC_CODE_INST_ATOMICRMW; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr + Vals.push_back(VE.getValueID(I.getOperand(1))); // val. + Vals.push_back(GetEncodedRMWOperation( + cast<AtomicRMWInst>(I).getOperation())); + Vals.push_back(cast<AtomicRMWInst>(I).isVolatile()); + Vals.push_back(GetEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering())); + Vals.push_back(GetEncodedSynchScope( + cast<AtomicRMWInst>(I).getSynchScope())); + break; + case Instruction::Fence: + Code = bitc::FUNC_CODE_INST_FENCE; + Vals.push_back(GetEncodedOrdering(cast<FenceInst>(I).getOrdering())); + Vals.push_back(GetEncodedSynchScope(cast<FenceInst>(I).getSynchScope())); break; case Instruction::Call: { const CallInst &CI = cast<CallInst>(I); - const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_CALL; diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index b68bf92..9ae9905 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -315,7 +315,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) { } -void ValueEnumerator::EnumerateType(const Type *Ty) { +void ValueEnumerator::EnumerateType(Type *Ty) { unsigned *TypeID = &TypeMap[Ty]; // We've already seen this type. @@ -325,8 +325,8 @@ void ValueEnumerator::EnumerateType(const Type *Ty) { // If it is a non-anonymous struct, mark the type as being visited so that we // don't recursively visit it. This is safe because we allow forward // references of these in the bitcode reader. - if (const StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isAnonymous()) + if (StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isLiteral()) *TypeID = ~0U; // Enumerate all of the subtypes before we enumerate this type. This ensures diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h index 6617b60..b6fc920 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -35,12 +35,12 @@ class MDSymbolTable; class ValueEnumerator { public: - typedef std::vector<const Type*> TypeList; + typedef std::vector<Type*> TypeList; // For each value, we remember its Value* and occurrence frequency. typedef std::vector<std::pair<const Value*, unsigned> > ValueList; private: - typedef DenseMap<const Type*, unsigned> TypeMapType; + typedef DenseMap<Type*, unsigned> TypeMapType; TypeMapType TypeMap; TypeList Types; @@ -85,7 +85,7 @@ public: unsigned getValueID(const Value *V) const; - unsigned getTypeID(const Type *T) const { + unsigned getTypeID(Type *T) const { TypeMapType::const_iterator I = TypeMap.find(T); assert(I != TypeMap.end() && "Type not in ValueEnumerator!"); return I->second-1; @@ -140,7 +140,7 @@ private: void EnumerateFunctionLocalMetadata(const MDNode *N); void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); - void EnumerateType(const Type *T); + void EnumerateType(Type *T); void EnumerateOperandType(const Value *V); void EnumerateAttributes(const AttrListPtr &PAL); diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 125e641..fafc010 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -31,7 +31,7 @@ using namespace llvm; /// of insertvalue or extractvalue indices that identify a member, return /// the linearized index of the start of the member. /// -unsigned llvm::ComputeLinearIndex(const Type *Ty, +unsigned llvm::ComputeLinearIndex(Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, unsigned CurIndex) { @@ -40,7 +40,7 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty, return CurIndex; // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); @@ -52,8 +52,8 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty, return CurIndex; } // Given an array type, recursively traverse the elements. - else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); + else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { if (Indices && *Indices == i) return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); @@ -72,12 +72,12 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty, /// If Offsets is non-null, it points to a vector to be filled in /// with the in-memory offsets of each of the individual values. /// -void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, +void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<uint64_t> *Offsets, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); for (StructType::element_iterator EB = STy->element_begin(), EI = EB, @@ -88,8 +88,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, return; } // Given an array type, recursively traverse the elements. - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 5861fa4..3f23873 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -27,7 +26,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7f314ee..1999f36 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,7 +33,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -45,6 +44,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -290,10 +290,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + unsigned Align = 1 << AlignLog; // Handle common symbols. if (GVKind.isCommon()) { - unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; @@ -307,17 +307,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { const MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align); return; } - if (MAI->hasLCOMMDirective()) { + if (MAI->getLCOMMDirectiveType() != LCOMM::None && + (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) { // .lcomm _foo, 42 - OutStreamer.EmitLocalCommonSymbol(GVSym, Size); + OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); return; } - unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; @@ -474,8 +474,10 @@ void AsmPrinter::EmitFunctionHeader() { void AsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (CurrentFnSym->isUndefined()) + if (CurrentFnSym->isUndefined()) { + OutStreamer.ForceCodeRegion(); return OutStreamer.EmitLabel(CurrentFnSym); + } report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); @@ -620,6 +622,9 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; + if (MMI->getCompactUnwindEncoding() != 0) + OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); + MachineModuleInfo &MMI = MF->getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); bool FoundOne = false; @@ -878,7 +883,7 @@ bool AsmPrinter::doFinalization(Module &M) { I != E; ++I) { MCSymbol *Name = Mang->getSymbol(I); - const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal()); + const GlobalValue *GV = I->getAliasedGlobal(); MCSymbol *Target = Mang->getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) @@ -1009,7 +1014,7 @@ void AsmPrinter::EmitConstantPool() { unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); - const Type *Ty = CPE.getType(); + Type *Ty = CPE.getType(); Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); OutStreamer.EmitLabel(GetCPISymbol(CPI)); @@ -1055,6 +1060,15 @@ void AsmPrinter::EmitJumpTableInfo() { EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + // If we know the form of the jump table, go ahead and tag it as such. + if (!JTInDiffSection) { + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) { + OutStreamer.EmitJumpTable32Region(); + } else { + OutStreamer.EmitDataRegion(); + } + } + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1226,22 +1240,53 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) { } } -/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the -/// function pointers, ignoring the init priority. +typedef std::pair<int, Constant*> Structor; + +static bool priority_order(const Structor& lhs, const Structor& rhs) { + return lhs.first < rhs.first; +} + +/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init +/// priority. void AsmPrinter::EmitXXStructorList(const Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the - // init priority, which we ignore. + // init priority. if (!isa<ConstantArray>(List)) return; - const ConstantArray *InitList = cast<ConstantArray>(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1)); - } + + // Sanity check the structors list. + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (!InitList) return; // Not an array! + StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs! + if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) || + !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). + + // Gather the structors in a form that's convenient for sorting by priority. + SmallVector<Structor, 8> Structors; + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); + if (!CS) continue; // Malformed. + if (CS->getOperand(1)->isNullValue()) + break; // Found a null terminator, skip the rest. + ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) continue; // Malformed. + Structors.push_back(std::make_pair(Priority->getLimitedValue(65535), + CS->getOperand(1))); + } + + // Emit the function pointers in reverse priority order. + switch (MAI->getStructorOutputOrder()) { + case Structors::None: + break; + case Structors::PriorityOrder: + std::sort(Structors.begin(), Structors.end(), priority_order); + break; + case Structors::ReversePriorityOrder: + std::sort(Structors.rbegin(), Structors.rend(), priority_order); + break; + } + for (unsigned i = 0, e = Structors.size(); i != e; ++i) + EmitGlobalConstant(Structors[i].second); } //===--------------------------------------------------------------------===// @@ -1406,8 +1451,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { // Generate a symbolic expression for the byte address const Constant *PtrVal = CE->getOperand(0); SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0], - IdxVec.size()); + int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); if (Offset == 0) @@ -1447,7 +1491,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); - const Type *Ty = CE->getType(); + Type *Ty = CE->getType(); const MCExpr *OpExpr = LowerConstant(Op, AP); @@ -1496,12 +1540,67 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, AsmPrinter &AP); +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) return -1; + + uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType()); + uint64_t Value = CI->getZExtValue(); + + // Make sure the constant is at least 8 bits long and has a power + // of 2 bit width. This guarantees the constant bit width is + // always a multiple of 8 bits, avoiding issues with padding out + // to Size and other such corner cases. + if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1; + + uint8_t Byte = static_cast<uint8_t>(Value); + + for (unsigned i = 1; i < Size; ++i) { + Value >>= 8; + if (static_cast<uint8_t>(Value) != Byte) return -1; + } + return Byte; + } + if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + // Make sure all array elements are sequences of the same repeated + // byte. + if (CA->getNumOperands() == 0) return -1; + + int Byte = isRepeatedByteSequence(CA->getOperand(0), TM); + if (Byte == -1) return -1; + + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM); + if (ThisByte == -1) return -1; + if (Byte != ThisByte) return -1; + } + return Byte; + } + + return -1; +} + static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { if (AddrSpace != 0 || !CA->isString()) { - // Not a string. Print the values in successive locations - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + // Not a string. Print the values in successive locations. + + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, AP.TM); + + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); + AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } return; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index dd5b0e2..4d6c281 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 5ac455e..8eda889 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -23,15 +23,15 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -49,7 +49,7 @@ namespace { static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); assert(DiagInfo && "Diagnostic context not passed down?"); - + // If the inline asm had metadata associated with it, pull out a location // cookie corresponding to which line the error occurred on. unsigned LocCookie = 0; @@ -57,13 +57,13 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { unsigned ErrorLine = Diag.getLineNo()-1; if (ErrorLine >= LocInfo->getNumOperands()) ErrorLine = 0; - + if (LocInfo->getNumOperands() != 0) if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine))) LocCookie = CI->getZExtValue(); } - + DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); } @@ -109,7 +109,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, + OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); @@ -121,7 +121,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser)); + OwningPtr<MCTargetAsmParser> + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 21396ca..9c1ce76 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -69,7 +69,7 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { // Emit attribute type. // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getAttribute(), - dwarf::AttributeString(AttrData.getAttribute())); + dwarf::AttributeString(AttrData.getAttribute())); // Emit form type. // FIXME: Doing work even in non-asm-verbose runs. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 91b7d08..8ed4f4c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -17,7 +17,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -77,7 +77,8 @@ void DwarfCFIException::EndModule() { // This is a temporary hack to keep sections in the same order they // were before. This lets us produce bit identical outputs while // transitioning to CFI. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + Asm->OutStreamer.SwitchSection( + const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection()); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 1fe035e..88b7524 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -16,7 +16,10 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" #include "llvm/Analysis/DIBuilder.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -132,8 +135,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(), - G.getContext().getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), + G.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -439,27 +442,36 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, addBlock(Die, Attribute, 0, Block); } +/// isTypeSigned - Return true if the type is signed. +static bool isTypeSigned(DIType Ty, int *SizeInBits) { + if (Ty.isDerivedType()) + return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + if (Ty.isBasicType()) + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed + || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + *SizeInBits = Ty.getSizeInBits(); + return true; + } + return false; +} + /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { assert (MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned form = dwarf::DW_FORM_udata; - switch (Ty.getSizeInBits()) { - case 8: form = dwarf::DW_FORM_data1; break; - case 16: form = dwarf::DW_FORM_data2; break; - case 32: form = dwarf::DW_FORM_data4; break; - case 64: form = dwarf::DW_FORM_data8; break; + int SizeInBits = -1; + bool SignedConstant = isTypeSigned(Ty, &SizeInBits); + unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; + switch (SizeInBits) { + case 8: Form = dwarf::DW_FORM_data1; break; + case 16: Form = dwarf::DW_FORM_data2; break; + case 32: Form = dwarf::DW_FORM_data4; break; + case 64: Form = dwarf::DW_FORM_data8; break; default: break; } - - DIBasicType BTy(Ty); - if (BTy.Verify() && - (BTy.getEncoding() == dwarf::DW_ATE_signed - || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) - addSInt(Block, 0, form, MO.getImm()); - else - addUInt(Block, 0, form, MO.getImm()); + SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) + : addUInt(Block, 0, Form, MO.getImm()); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); return true; @@ -555,7 +567,7 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); } else if (Context.isSubprogram()) { - DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context)); + DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context)); ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getDIE(Context)) ContextDIE->addChild(Die); @@ -565,7 +577,10 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. -DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) { +DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { + DIType Ty(TyNode); + if (!Ty.Verify()) + return NULL; DIE *TyDIE = getDIE(Ty); if (TyDIE) return TyDIE; @@ -617,7 +632,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { void CompileUnit::addGlobalType(DIType Ty) { DIDescriptor Context = Ty.getContext(); if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace())) + && (!Context || Context.isCompileUnit() || Context.isFile() + || Context.isNameSpace())) if (DIEEntry *Entry = getDIEEntry(Ty)) GlobalTypes[Ty.getName()] = Entry->getEntry(); } @@ -642,13 +658,20 @@ void CompileUnit::addPubTypes(DISubprogram SP) { void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. StringRef Name = BTy.getName(); - Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); - // Add name if not anonymous or intermediate type. if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { + Buffer.setTag(dwarf::DW_TAG_unspecified_type); + // Unspecified types has only name, nothing else. + return; + } + + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + uint64_t Size = BTy.getSizeInBits() >> 3; addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); } @@ -752,7 +775,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *ElemDie = NULL; if (Element.isSubprogram()) { DISubprogram SP(Element); - ElemDie = DD->createSubprogramDIE(DISubprogram(Element)); + ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); if (SP.isProtected()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_protected); @@ -880,6 +903,218 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; } +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +/// getOrCreateSubprogramDIE - Create new DIE using SP. +DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + DIE *SPDie = getDIE(SP); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + insertDIE(SP, SPDie); + + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + addTemplateParams(*SPDie, SP.getTemplateParams()); + + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (SP.getFunctionDeclaration().isSubprogram()) + return SPDie; + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + SP.getName()); + + addSourceLine(SPDie, SP); + + if (SP.isPrototyped()) + addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add Return Type. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) + addType(SPDie, SPTy); + else + addType(SPDie, DIType(Args.getElement(0))); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = getDIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + ContainingTypeMap.insert(std::make_pair(SPDie, + SP.getContainingType())); + } + + if (!SP.isDefinition()) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + addType(Arg, ATy); + if (ATy.isArtificial()) + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + } + + if (SP.isArtificial()) + addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + + if (!SP.isLocalToUnit()) + addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + if (SP.isOptimized()) + addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + + return SPDie; +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return NULL; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa<GlobalValue>(Ptr) || + !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) + return NULL; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return NULL; + + // Third operand is offset. + if (!isa<ConstantInt>(CE->getOperand(2))) + return NULL; + + return CE; +} + +/// createGlobalVariableDIE - create global variable DIE. +void CompileUnit::createGlobalVariableDIE(const MDNode *N) { + // Check for pre-existence. + if (getDIE(N)) + return; + + DIGlobalVariable GV(N); + if (!GV.Verify()) + return; + + DIE *VariableDIE = new DIE(GV.getTag()); + // Add to map. + insertDIE(N, VariableDIE); + + // Add name. + addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + StringRef LinkageName = GV.getLinkageName(); + bool isGlobalVariable = GV.getGlobal() != NULL; + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + // Add type. + DIType GTy = GV.getType(); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) { + addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + // Expose as global. + addGlobal(GV.getName(), VariableDIE); + } + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to context owner. + DIDescriptor GVContext = GV.getContext(); + addToContextOwner(VariableDIE, GVContext); + // Add location. + if (isGlobalVariable) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(GV.getGlobal())); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !isSubprogramContext(GVContext)) { + // Create specification DIE. + DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, + 1); + addDie(VariableSpecDIE); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + } else if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(GV.getConstant())) + addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); + else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + // GV is a merged global. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + Value *Ptr = CE->getOperand(0); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); + addUInt(Block, 0, dwarf::DW_FORM_udata, + Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + + return; +} + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); @@ -944,6 +1179,128 @@ DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { return Enumerator; } +/// constructContainingTypeDIEs - Construct DIEs for types that contain +/// vtables. +void CompileUnit::constructContainingTypeDIEs() { + for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); CI != CE; ++CI) { + DIE *SPDie = CI->first; + const MDNode *N = CI->second; + if (!N) continue; + DIE *NDie = getDIE(N); + if (!NDie) continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + } +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { + StringRef Name = DV->getName(); + if (Name.empty()) + return NULL; + + // Translate tag to proper Dwarf tag. + unsigned Tag = DV->getTag(); + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; + if (AbsDIE) + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); + else { + addString(VariableDie, dwarf::DW_AT_name, + dwarf::DW_FORM_string, Name); + addSourceLine(VariableDie, DV->getVariable()); + addType(VariableDie, DV->getType()); + } + + if (DV->isArtificial()) + addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); + + if (isScopeAbstract) { + DV->setDIE(VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV->getDotDebugLocOffset(); + if (Offset != ~0U) { + addLabel(VariableDie, dwarf::DW_AT_location, + dwarf::DW_FORM_data4, + Asm->GetTempSymbol("debug_loc", Offset)); + DV->setDIE(VariableDie); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV->getMInsn()) { + bool updated = false; + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (DVInsn->getOperand(1).isImm() && + TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); + updated = true; + } + else if (DVInsn->getOperand(0).isImm()) + updated = + addConstantValue(VariableDie, DVInsn->getOperand(0), + DV->getType()); + else if (DVInsn->getOperand(0).isFPImm()) + updated = + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + updated = + addConstantValue(VariableDie, + DVInsn->getOperand(0).getCImm(), + DV->getType().isUnsignedDIType()); + } else { + addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; + } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + DV->setDIE(VariableDie); + return VariableDie; + } else { + // .. else use frame index. + int FI = DV->getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, VariableDie, Location); + } + } + + DV->setDIE(VariableDie); + return VariableDie; +} + /// createMemberDIE - Create new member DIE. DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); @@ -1013,7 +1370,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) + else addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_public); if (DT.isVirtual()) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 213c7fc..7859265 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -67,6 +67,11 @@ class CompileUnit { /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const MDNode *> ContainingTypeMap; + public: CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); ~CompileUnit(); @@ -226,9 +231,12 @@ public: /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); + /// getOrCreateSubprogramDIE - Create new DIE using SP. + DIE *getOrCreateSubprogramDIE(DISubprogram SP); + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. - DIE *getOrCreateTypeDIE(DIType Ty); + DIE *getOrCreateTypeDIE(const MDNode *N); /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateTypeParameter. @@ -242,6 +250,9 @@ public: /// information entry. DIEEntry *createDIEEntry(DIE *Entry); + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(const MDNode *N); + void addPubTypes(DISubprogram SP); /// constructTypeDIE - Construct basic type die from DIBasicType. @@ -266,6 +277,13 @@ public: /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. DIE *constructEnumTypeDIE(DIEnumerator ETy); + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + /// createMemberDIE - Create new member DIE. DIE *createMemberDIE(DIDerivedType DT); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 125e1e8..1b7e370 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -24,7 +24,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -45,9 +44,6 @@ #include "llvm/Support/Path.h" using namespace llvm; -static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, - cl::desc("Print DbgScope information for each machine instruction")); - static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -69,7 +65,7 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { -DIType DbgVariable::getType() const { +DIType DbgVariable::getType() const { DIType Ty = Var.getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. @@ -120,141 +116,12 @@ DIType DbgVariable::getType() const { return Ty; } -//===----------------------------------------------------------------------===// -/// DbgRange - This is used to track range of instructions with identical -/// debug info scope. -/// -typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange; - -//===----------------------------------------------------------------------===// -/// DbgScope - This class is used to track scope information. -/// -class DbgScope { - DbgScope *Parent; // Parent to this scope. - DIDescriptor Desc; // Debug info descriptor for scope. - // Location at which this scope is inlined. - AssertingVH<const MDNode> InlinedAtLocation; - bool AbstractScope; // Abstract Scope - const MachineInstr *LastInsn; // Last instruction of this scope. - const MachineInstr *FirstInsn; // First instruction of this scope. - unsigned DFSIn, DFSOut; - // Scopes defined in scope. Contents not owned. - SmallVector<DbgScope *, 4> Scopes; - // Variables declared in scope. Contents owned. - SmallVector<DbgVariable *, 8> Variables; - SmallVector<DbgRange, 4> Ranges; - // Private state for dump() - mutable unsigned IndentLevel; -public: - DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0) - : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), - LastInsn(0), FirstInsn(0), - DFSIn(0), DFSOut(0), IndentLevel(0) {} - virtual ~DbgScope(); - - // Accessors. - DbgScope *getParent() const { return Parent; } - void setParent(DbgScope *P) { Parent = P; } - DIDescriptor getDesc() const { return Desc; } - const MDNode *getInlinedAt() const { return InlinedAtLocation; } - const MDNode *getScopeNode() const { return Desc; } - const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } - const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; } - const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } - - /// openInsnRange - This scope covers instruction range starting from MI. - void openInsnRange(const MachineInstr *MI) { - if (!FirstInsn) - FirstInsn = MI; - - if (Parent) - Parent->openInsnRange(MI); - } - - /// extendInsnRange - Extend the current instruction range covered by - /// this scope. - void extendInsnRange(const MachineInstr *MI) { - assert (FirstInsn && "MI Range is not open!"); - LastInsn = MI; - if (Parent) - Parent->extendInsnRange(MI); - } - - /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected - /// until now. This is used when a new scope is encountered while walking - /// machine instructions. - void closeInsnRange(DbgScope *NewScope = NULL) { - assert (LastInsn && "Last insn missing!"); - Ranges.push_back(DbgRange(FirstInsn, LastInsn)); - FirstInsn = NULL; - LastInsn = NULL; - // If Parent dominates NewScope then do not close Parent's instruction - // range. - if (Parent && (!NewScope || !Parent->dominates(NewScope))) - Parent->closeInsnRange(NewScope); - } - - void setAbstractScope() { AbstractScope = true; } - bool isAbstractScope() const { return AbstractScope; } - - // Depth First Search support to walk and mainpluate DbgScope hierarchy. - unsigned getDFSOut() const { return DFSOut; } - void setDFSOut(unsigned O) { DFSOut = O; } - unsigned getDFSIn() const { return DFSIn; } - void setDFSIn(unsigned I) { DFSIn = I; } - bool dominates(const DbgScope *S) { - if (S == this) - return true; - if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) - return true; - return false; - } - - /// addScope - Add a scope to the scope. - /// - void addScope(DbgScope *S) { Scopes.push_back(S); } - - /// addVariable - Add a variable to the scope. - /// - void addVariable(DbgVariable *V) { Variables.push_back(V); } - -#ifndef NDEBUG - void dump() const; -#endif -}; - } // end llvm namespace -#ifndef NDEBUG -void DbgScope::dump() const { - raw_ostream &err = dbgs(); - err.indent(IndentLevel); - err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; - const MDNode *N = Desc; - N->dump(); - if (AbstractScope) - err << "Abstract Scope\n"; - - IndentLevel += 2; - if (!Scopes.empty()) - err << "Children ...\n"; - for (unsigned i = 0, e = Scopes.size(); i != e; ++i) - if (Scopes[i] != this) - Scopes[i]->dump(); - - IndentLevel -= 2; -} -#endif - -DbgScope::~DbgScope() { - for (unsigned j = 0, M = Variables.size(); j < M; ++j) - delete Variables[j]; -} - DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), - CurrentFnDbgScope(0), PrevLabel(NULL) { + PrevLabel(NULL) { NextStringPoolNumber = 0; DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; @@ -311,147 +178,12 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { - CompileUnit *SPCU = getCompileUnit(SP); - DIE *SPDie = SPCU->getDIE(SP); - if (SPDie) - return SPDie; - - SPDie = new DIE(dwarf::DW_TAG_subprogram); - - // DW_TAG_inlined_subroutine may refer to this DIE. - SPCU->insertDIE(SP, SPDie); - - // Add to context owner. - SPCU->addToContextOwner(SPDie, SP.getContext()); - - // Add function template parameters. - SPCU->addTemplateParams(*SPDie, SP.getTemplateParams()); - - StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) - SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); - - // If this DIE is going to refer declaration info using AT_specification - // then there is no need to add other attributes. - if (SP.getFunctionDeclaration().isSubprogram()) - return SPDie; - - // Constructors and operators for anonymous aggregates do not have names. - if (!SP.getName().empty()) - SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - SP.getName()); - - SPCU->addSourceLine(SPDie, SP); - - if (SP.isPrototyped()) - SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - - // Add Return Type. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - - if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - SPCU->addType(SPDie, SPTy); - else - SPCU->addType(SPDie, DIType(Args.getElement(0))); - - unsigned VK = SP.getVirtuality(); - if (VK) { - SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = SPCU->getDIEBlock(); - SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType())); - } - - if (!SP.isDefinition()) { - SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. Do not add arguments for subprogram definition. They will - // be handled while processing variables. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - } - - if (SP.isArtificial()) - SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - - if (!SP.isLocalToUnit()) - SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - - if (SP.isOptimized()) - SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); - - if (unsigned isa = Asm->getISAEncoding()) { - SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); - } - - return SPDie; -} - -DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) { - assert(N && "Invalid Scope encoding!"); - - DbgScope *AScope = AbstractScopes.lookup(N); - if (AScope) - return AScope; - - DbgScope *Parent = NULL; - - DIDescriptor Scope(N); - if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); - DIDescriptor ParentDesc = DB.getContext(); - Parent = getOrCreateAbstractScope(ParentDesc); - } - - AScope = new DbgScope(Parent, DIDescriptor(N), NULL); - - if (Parent) - Parent->addScope(AScope); - AScope->setAbstractScope(); - AbstractScopes[N] = AScope; - if (DIDescriptor(N).isSubprogram()) - AbstractScopesList.push_back(AScope); - return AScope; -} - -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -static bool isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - DIDescriptor D(Context); - if (D.isSubprogram()) - return true; - if (D.isType()) - return isSubprogramContext(DIType(Context).getContext()); - return false; -} - /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert /// DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { - CompileUnit *SPCU = getCompileUnit(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, + const MDNode *SPNode) { DIE *SPDie = SPCU->getDIE(SPNode); assert(SPDie && "Unable to find subprogram DIE!"); @@ -461,7 +193,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { if (SPDecl.isSubprogram()) // Refer function declaration directly. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - createSubprogramDIE(SPDecl)); + SPCU->getOrCreateSubprogramDIE(SPDecl)); else { // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another @@ -514,25 +246,26 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { /// constructLexicalScope - Construct new DW_TAG_lexical_block /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { +DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, + LexicalScope *Scope) { DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); if (Ranges.empty()) return 0; - CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode()); - SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); - for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + DebugRangeSymbols.size() + * Asm->getTargetData().getPointerSize()); + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); @@ -559,22 +292,29 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { /// constructInlinedScopeDIE - This scope represents inlined body of /// a function. Construct DIE to represent this concrete inlined copy /// of the function. -DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { +DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, + LexicalScope *Scope) { - const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); assert (Ranges.empty() == false - && "DbgScope does not have instruction markers!"); + && "LexicalScope does not have instruction markers!"); - // FIXME : .debug_inlined section specification does not clearly state how - // to emit inlined scope that is split into multiple instruction ranges. - // For now, use first instruction range and emit low_pc/high_pc pair and - // corresponding .debug_inlined section entry for this pair. - SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + if (!Scope->getScopeNode()) + return NULL; + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS); + DIE *OriginDIE = TheCU->getDIE(InlinedSP); + if (!OriginDIE) { + DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); + return NULL; + } + + SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - assert (0 && "Unexpected Start and End labels for a inlined scope!"); + assert (0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } assert(StartLabel->isDefined() && @@ -582,26 +322,38 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); - if (!Scope->getScopeNode()) - return NULL; - DIScope DS(Scope->getScopeNode()); - DISubprogram InlinedSP = getDISubprogram(DS); - CompileUnit *TheCU = getCompileUnit(InlinedSP); - DIE *OriginDIE = TheCU->getDIE(InlinedSP); - if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); - return NULL; - } DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() + * Asm->getTargetData().getPointerSize()); + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); + DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + } else { + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + StartLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + EndLabel); + } InlinedSubprogramDIEs.insert(OriginDIE); // Track the start label for this inlined function. + //.debug_inlined section specification does not clearly state how + // to emit inlined scope that is split into multiple instruction ranges. + // For now, use first instruction range and emit low_pc/high_pc pair and + // corresponding .debug_inlined section entry for this pair. DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator I = InlineInfo.find(InlinedSP); @@ -619,200 +371,51 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { return ScopeDIE; } -/// isUnsignedDIType - Return true if type encoding is unsigned. -static bool isUnsignedDIType(DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.Verify()) - return isUnsignedDIType(DTy.getTypeDerivedFrom()); - - DIBasicType BTy(Ty); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) - return true; - } - return false; -} - -/// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { - StringRef Name = DV->getName(); - if (Name.empty()) - return NULL; - - // Translate tag to proper Dwarf tag. The result variable is dropped for - // now. - unsigned Tag; - switch (DV->getTag()) { - case dwarf::DW_TAG_return_variable: - return NULL; - case dwarf::DW_TAG_arg_variable: - Tag = dwarf::DW_TAG_formal_parameter; - break; - case dwarf::DW_TAG_auto_variable: // fall thru - default: - Tag = dwarf::DW_TAG_variable; - break; - } - - // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - CompileUnit *VariableCU = getCompileUnit(DV->getVariable()); - DIE *AbsDIE = NULL; - DenseMap<const DbgVariable *, const DbgVariable *>::iterator - V2AVI = VarToAbstractVarMap.find(DV); - if (V2AVI != VarToAbstractVarMap.end()) - AbsDIE = V2AVI->second->getDIE(); - - if (AbsDIE) - VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); - else { - VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - Name); - VariableCU->addSourceLine(VariableDie, DV->getVariable()); - - // Add variable type. - VariableCU->addType(VariableDie, DV->getType()); - } - - if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) - VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); - else if (DIVariable(DV->getVariable()).isArtificial()) - VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); - - if (Scope->isAbstractScope()) { - DV->setDIE(VariableDie); - return VariableDie; - } - - // Add variable address. - - unsigned Offset = DV->getDotDebugLocOffset(); - if (Offset != ~0U) { - VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); - DV->setDIE(VariableDie); - UseDotDebugLocEntry.insert(VariableDie); - return VariableDie; - } - - // Check if variable is described by a DBG_VALUE instruction. - DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI = - DbgVariableToDbgInstMap.find(DV); - if (DVI != DbgVariableToDbgInstMap.end()) { - const MachineInstr *DVInsn = DVI->second; - bool updated = false; - // FIXME : Handle getNumOperands != 3 - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (DVInsn->getOperand(1).isImm() && - TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), - FrameReg); - MachineLocation Location(FrameReg, Offset); - VariableCU->addVariableAddress(DV, VariableDie, Location); - - } else if (RegOp.getReg()) - VariableCU->addVariableAddress(DV, VariableDie, - MachineLocation(RegOp.getReg())); - updated = true; - } - else if (DVInsn->getOperand(0).isImm()) - updated = - VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0), - DV->getType()); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - updated = - VariableCU->addConstantValue(VariableDie, - DVInsn->getOperand(0).getCImm(), - isUnsignedDIType(DV->getType())); - } else { - VariableCU->addVariableAddress(DV, VariableDie, - Asm->getDebugValueLocation(DVInsn)); - updated = true; - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } - DV->setDIE(VariableDie); - return VariableDie; - } - - // .. else use frame index, if available. - int FI = 0; - if (findVariableFrameIndex(DV, &FI)) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - VariableCU->addVariableAddress(DV, VariableDie, Location); - } - - DV->setDIE(VariableDie); - return VariableDie; - -} - /// constructScopeDIE - Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; SmallVector <DIE *, 8> Children; // Collect arguments for current function. - if (Scope == CurrentFnDbgScope) + if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) - if (DIE *Arg = constructVariableDIE(ArgDV, Scope)) + if (DIE *Arg = + TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) Children.push_back(Arg); - // Collect lexical scope childrens first. - const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); + // Collect lexical scope children first. + const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) - if (DIE *Variable = constructVariableDIE(Variables[i], Scope)) + if (DIE *Variable = + TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) Children.push_back(Variable); - const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); + const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) - if (DIE *Nested = constructScopeDIE(Scopes[j])) + if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) - ScopeDIE = constructInlinedScopeDIE(Scope); + ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); else if (DS.isSubprogram()) { ProcessedSPNodes.insert(DS); if (Scope->isAbstractScope()) { - ScopeDIE = getCompileUnit(DS)->getDIE(DS); + ScopeDIE = TheCU->getDIE(DS); // Note down abstract DIE. if (ScopeDIE) AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); } else - ScopeDIE = updateSubprogramScopeDIE(DS); + ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); } else { // There is no need to emit empty lexical block DIE. if (Children.empty()) return NULL; - ScopeDIE = constructLexicalScopeDIE(Scope); + ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); } if (!ScopeDIE) return NULL; @@ -823,7 +426,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - getCompileUnit(DS)->addPubTypes(DISubprogram(DS)); + TheCU->addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -862,7 +465,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. -void DwarfDebug::constructCompileUnit(const MDNode *N) { +CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); @@ -893,7 +496,8 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, + Flags); unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) @@ -903,159 +507,19 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; CUMap.insert(std::make_pair(N, NewCU)); -} - -/// getCompielUnit - Get CompileUnit DIE. -CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { - assert (N && "Invalid DwarfDebug::getCompileUnit argument!"); - DIDescriptor D(N); - const MDNode *CUNode = NULL; - if (D.isCompileUnit()) - CUNode = N; - else if (D.isSubprogram()) - CUNode = DISubprogram(N).getCompileUnit(); - else if (D.isType()) - CUNode = DIType(N).getCompileUnit(); - else if (D.isGlobalVariable()) - CUNode = DIGlobalVariable(N).getCompileUnit(); - else if (D.isVariable()) - CUNode = DIVariable(N).getCompileUnit(); - else if (D.isNameSpace()) - CUNode = DINameSpace(N).getCompileUnit(); - else if (D.isFile()) - CUNode = DIFile(N).getCompileUnit(); - else - return FirstCU; - - DenseMap<const MDNode *, CompileUnit *>::const_iterator I - = CUMap.find(CUNode); - if (I == CUMap.end()) - return FirstCU; - return I->second; -} - -// Return const exprssion if value is a GEP to access merged global -// constant. e.g. -// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) -static const ConstantExpr *getMergedGlobalExpr(const Value *V) { - const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); - if (!CE || CE->getNumOperands() != 3 || - CE->getOpcode() != Instruction::GetElementPtr) - return NULL; - - // First operand points to a global value. - if (!isa<GlobalValue>(CE->getOperand(0))) - return NULL; - - // Second operand is zero. - const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); - if (!CI || !CI->isZero()) - return NULL; - - // Third operand is offset. - if (!isa<ConstantInt>(CE->getOperand(2))) - return NULL; - - return CE; -} - -/// constructGlobalVariableDIE - Construct global variable DIE. -void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { - DIGlobalVariable GV(N); - - // If debug information is malformed then ignore it. - if (GV.Verify() == false) - return; - - // Check for pre-existence. - CompileUnit *TheCU = getCompileUnit(N); - if (TheCU->getDIE(GV)) - return; - - DIType GTy = GV.getType(); - DIE *VariableDIE = new DIE(GV.getTag()); - - bool isGlobalVariable = GV.getGlobal() != NULL; - - // Add name. - TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty() && isGlobalVariable) - TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); - // Add type. - TheCU->addType(VariableDIE, GTy); - - // Add scoping info. - if (!GV.isLocalToUnit()) { - TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // Expose as global. - TheCU->addGlobal(GV.getName(), VariableDIE); - } - // Add line number info. - TheCU->addSourceLine(VariableDIE, GV); - // Add to map. - TheCU->insertDIE(N, VariableDIE); - // Add to context owner. - DIDescriptor GVContext = GV.getContext(); - TheCU->addToContextOwner(VariableDIE, GVContext); - // Add location. - if (isGlobalVariable) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(GV.getGlobal())); - // Do not create specification DIE if context is either compile unit - // or a subprogram. - if (GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !isSubprogramContext(GVContext)) { - // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDIE); - TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - TheCU->addDie(VariableSpecDIE); - } else { - TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); - } - } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) - TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); - else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { - // GV is a merged global. - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); - ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2)); - TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); - TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); - } - - return; + return NewCU; } /// construct SubprogramDIE - Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, + const MDNode *N) { DISubprogram SP(N); - - // Check for pre-existence. - CompileUnit *TheCU = getCompileUnit(N); - if (TheCU->getDIE(N)) - return; - if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing // class type. return; - DIE *SubprogramDie = createSubprogramDIE(SP); + DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); // Add to map. TheCU->insertDIE(N, SubprogramDie); @@ -1066,71 +530,115 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) { // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); + SPMap[N] = TheCU; return; } +/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such +/// as llvm.dbg.enum and llvm.dbg.ty +void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) { + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) + constructSubprogramDIE(CU, N); + } + + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) + CU->createGlobalVariableDIE(N); + } + + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) + CU->getOrCreateTypeDIE(Ty); + } + + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) + CU->getOrCreateTypeDIE(Ty); + } +} + +/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. +/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder. +bool DwarfDebug::collectLegacyDebugInfo(Module *M) { + DebugInfoFinder DbgFinder; + DbgFinder.processModule(*M); + + bool HasDebugInfo = false; + // Scan all the compile-units to see if there are any marked as the main + // unit. If not, we do not generate debug info. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + if (DICompileUnit(*I).isMain()) { + HasDebugInfo = true; + break; + } + } + if (!HasDebugInfo) return false; + + // Create all the compile unit DIEs. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) + constructCompileUnit(*I); + + // Create DIEs for each global variable. + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) { + const MDNode *N = *I; + if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) + CU->createGlobalVariableDIE(N); + } + + // Create DIEs for each subprogram. + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) { + const MDNode *N = *I; + if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) + constructSubprogramDIE(CU, N); + } + + return HasDebugInfo; +} + /// beginModule - Emit all Dwarf sections that should come prior to the /// content. Create global DIEs and emit initial debug info sections. -/// This is inovked by the target AsmPrinter. +/// This is invoked by the target AsmPrinter. void DwarfDebug::beginModule(Module *M) { if (DisableDebugInfoPrinting) return; - // If module has named metadata anchors then use them, otherwise scan the module - // using debug info finder to collect debug info. + // If module has named metadata anchors then use them, otherwise scan the + // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { - - NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv"); - NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp"); - if (!GV_Nodes && !SP_Nodes) - // If there are not any global variables or any functions then - // there is not any debug info in this module. - return; - - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) - constructCompileUnit(CU_Nodes->getOperand(i)); - - if (GV_Nodes) - for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i) - constructGlobalVariableDIE(GV_Nodes->getOperand(i)); - - if (SP_Nodes) - for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i) - constructSubprogramDIE(SP_Nodes->getOperand(i)); - - } else { - - DebugInfoFinder DbgFinder; - DbgFinder.processModule(*M); - - bool HasDebugInfo = false; - // Scan all the compile-units to see if there are any marked as the main unit. - // if not, we do not generate debug info. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - if (DICompileUnit(*I).isMain()) { - HasDebugInfo = true; - break; - } + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CUNode(CU_Nodes->getOperand(i)); + CompileUnit *CU = constructCompileUnit(CUNode); + DIArray GVs = CUNode.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) + CU->createGlobalVariableDIE(GVs.getElement(i)); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + constructSubprogramDIE(CU, SPs.getElement(i)); + DIArray EnumTypes = CUNode.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + DIArray RetainedTypes = CUNode.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); } - if (!HasDebugInfo) return; - - // Create all the compile unit DIEs. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) - constructCompileUnit(*I); - - // Create DIEs for each global variable. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) - constructGlobalVariableDIE(*I); - - // Create DIEs for each subprogram. - for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) - constructSubprogramDIE(*I); - } + } else if (!collectLegacyDebugInfo(M)) + return; + + collectInfoFromNamedMDNodes(M); // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); @@ -1138,19 +646,6 @@ void DwarfDebug::beginModule(Module *M) { // Emit initial sections. EmitSectionLabels(); - //getOrCreateTypeDIE - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); - } - // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); } @@ -1160,38 +655,38 @@ void DwarfDebug::beginModule(Module *M) { void DwarfDebug::endModule() { if (!FirstCU) return; const Module *M = MMI->getModule(); - DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap; - if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { - for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { - if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; - DISubprogram SP(AllSPs->getOperand(SI)); - if (!SP.Verify()) continue; - - // Collect info for variables that were optimized out. - if (!SP.isDefinition()) continue; - StringRef FName = SP.getLinkageName(); - if (FName.empty()) - FName = SP.getName(); - NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName); - if (!NMD) continue; - unsigned E = NMD->getNumOperands(); - if (!E) continue; - DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); - DeadFnScopeMap[SP] = Scope; - for (unsigned I = 0; I != E; ++I) { - DIVariable DV(NMD->getOperand(I)); - if (!DV.Verify()) continue; - Scope->addVariable(new DbgVariable(DV)); - } + DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap; - // Construct subprogram DIE and add variables DIEs. - constructSubprogramDIE(SP); - DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); - const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); - if (VariableDIE) - ScopeDIE->addChild(VariableDIE); + // Collect info for variables that were optimized out. + if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit TheCU(CU_Nodes->getOperand(i)); + DIArray Subprograms = TheCU.getSubprograms(); + for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { + DISubprogram SP(Subprograms.getElement(i)); + if (ProcessedSPNodes.count(SP) != 0) continue; + if (!SP.Verify()) continue; + if (!SP.isDefinition()) continue; + DIArray Variables = SP.getVariables(); + if (Variables.getNumElements() == 0) continue; + + LexicalScope *Scope = + new LexicalScope(NULL, DIDescriptor(SP), NULL, false); + DeadFnScopeMap[SP] = Scope; + + // Construct subprogram DIE and add variables DIEs. + CompileUnit *SPCU = CUMap.lookup(TheCU); + assert (SPCU && "Unable to find Compile Unit!"); + constructSubprogramDIE(SPCU, SP); + DIE *ScopeDIE = SPCU->getDIE(SP); + for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { + DIVariable DV(Variables.getElement(vi)); + if (!DV.Verify()) continue; + DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (DIE *VariableDIE = + SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) + ScopeDIE->addChild(VariableDIE); + } } } } @@ -1203,15 +698,12 @@ void DwarfDebug::endModule() { FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } - for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); CI != CE; ++CI) { - DIE *SPDie = CI->first; - const MDNode *N = dyn_cast_or_null<MDNode>(CI->second); - if (!N) continue; - DIE *NDie = getCompileUnit(N)->getDIE(N); - if (!NDie) continue; - getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, - dwarf::DW_FORM_ref4, NDie); + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. + for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), + CUE = CUMap.end(); CUI != CUE; ++CUI) { + CompileUnit *TheCU = CUI->second; + TheCU->constructContainingTypeDIEs(); } // Standard sections final addresses. @@ -1261,6 +753,7 @@ void DwarfDebug::endModule() { // clean up. DeleteContainerSeconds(DeadFnScopeMap); + SPMap.clear(); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) delete I->second; @@ -1268,29 +761,30 @@ void DwarfDebug::endModule() { } /// findAbstractVariable - Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, DebugLoc ScopeLoc) { - + LLVMContext &Ctx = DV->getContext(); + // More then one inlined variable corresponds to one abstract variable. + DIVariable Var = cleanseInlinedVariable(DV, Ctx); DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); if (AbsDbgVariable) return AbsDbgVariable; - LLVMContext &Ctx = Var->getContext(); - DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx)); + LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx)); if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var); - Scope->addVariable(AbsDbgVariable); + AbsDbgVariable = new DbgVariable(Var, NULL); + addScopeVariable(Scope, AbsDbgVariable); AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; } -/// addCurrentFnArgument - If Var is an current function argument that add -/// it in CurrentFnArguments list. +/// addCurrentFnArgument - If Var is a current function argument then add +/// it to CurrentFnArguments list. bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, DbgScope *Scope) { - if (Scope != CurrentFnDbgScope) + DbgVariable *Var, LexicalScope *Scope) { + if (!LScopes.isCurrentFunctionScope(Scope)) return false; DIVariable DV = Var->getVariable(); if (DV.getTag() != dwarf::DW_TAG_arg_variable) @@ -1313,7 +807,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. void -DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, +DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), @@ -1324,21 +818,19 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DIVariable DV(Var); const std::pair<unsigned, DebugLoc> &VP = VI->second; - DbgScope *Scope = findDbgScope(VP.second); + LexicalScope *Scope = LScopes.findLexicalScope(VP.second); // If variable scope is not found then skip this variable. if (Scope == 0) continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV); - recordVariableFrameIndex(RegVar, VP.first); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + RegVar->setFrameIndex(VP.first); if (!addCurrentFnArgument(MF, RegVar, Scope)) - Scope->addVariable(RegVar); - if (AbsDbgVariable) { - recordVariableFrameIndex(AbsDbgVariable, VP.first); - VarToAbstractVarMap[RegVar] = AbsDbgVariable; - } + addScopeVariable(Scope, RegVar); + if (AbsDbgVariable) + AbsDbgVariable->setFrameIndex(VP.first); } } @@ -1351,7 +843,7 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } -/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting +/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting /// at MI. static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MCSymbol *FLabel, @@ -1379,7 +871,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, return DotDebugLocEntry(); } -/// collectVariableInfo - Populate DbgScope entries with variables' info. +/// collectVariableInfo - Find variables for each lexical scope. void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { @@ -1402,30 +894,37 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, const MachineInstr *MInsn = History.front(); DIVariable DV(Var); - DbgScope *Scope = NULL; + LexicalScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) - Scope = CurrentFnDbgScope; - else - Scope = findDbgScope(MInsn->getDebugLoc()); + Scope = LScopes.getCurrentFunctionScope(); + else { + if (DV.getVersion() <= LLVMDebugVersion9) + Scope = LScopes.findLexicalScope(MInsn->getDebugLoc()); + else { + if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); + else + Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); + } + } // If variable scope is not found then skip this variable. if (!Scope) continue; Processed.insert(DV); assert(MInsn->isDebugValue() && "History must begin with debug value"); - DbgVariable *RegVar = new DbgVariable(DV); + DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar); if (!addCurrentFnArgument(MF, RegVar, Scope)) - Scope->addVariable(RegVar); - if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { - DbgVariableToDbgInstMap[AbsVar] = MInsn; - VarToAbstractVarMap[RegVar] = AbsVar; - } + addScopeVariable(Scope, RegVar); + if (AbsVar) + AbsVar->setMInsn(MInsn); // Simple ranges that are fully coalesced. if (History.size() <= 1 || (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { - DbgVariableToDbgInstMap[RegVar] = MInsn; + RegVar->setMInsn(MInsn); continue; } @@ -1471,16 +970,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // Collect info for variables that were optimized out. - const Function *F = MF->getFunction(); - if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast<MDNode>(NMD->getOperand(i))); - if (!DV || !Processed.insert(DV)) - continue; - DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); - if (Scope) - Scope->addVariable(new DbgVariable(DV)); - } + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + if (!DV || !DV.Verify() || !Processed.insert(DV)) + continue; + if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) + addScopeVariable(Scope, new DbgVariable(DV, NULL)); } } @@ -1561,237 +1058,33 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) { I->second = PrevLabel; } -/// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) { - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; - DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); - - if (!InlinedAt) { - DbgScope *WScope = DbgScopeMap.lookup(Scope); - if (WScope) - return WScope; - WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); - DbgScopeMap.insert(std::make_pair(Scope, WScope)); - if (DIDescriptor(Scope).isLexicalBlock()) { - DbgScope *Parent = - getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope)); - WScope->setParent(Parent); - Parent->addScope(WScope); - } else if (DIDescriptor(Scope).isSubprogram() - && DISubprogram(Scope).describes(Asm->MF->getFunction())) - CurrentFnDbgScope = WScope; - - return WScope; - } - - getOrCreateAbstractScope(Scope); - DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); - if (WScope) - return WScope; - - WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); - DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); - InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope; - DbgScope *Parent = - getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt)); - WScope->setParent(Parent); - Parent->addScope(WScope); - return WScope; -} - -/// calculateDominanceGraph - Calculate dominance graph for DbgScope -/// hierarchy. -static void calculateDominanceGraph(DbgScope *Scope) { - assert (Scope && "Unable to calculate scop edominance graph!"); - SmallVector<DbgScope *, 4> WorkStack; - WorkStack.push_back(Scope); - unsigned Counter = 0; - while (!WorkStack.empty()) { - DbgScope *WS = WorkStack.back(); - const SmallVector<DbgScope *, 4> &Children = WS->getScopes(); - bool visitedChildren = false; - for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) { - DbgScope *ChildScope = *SI; - if (!ChildScope->getDFSOut()) { - WorkStack.push_back(ChildScope); - visitedChildren = true; - ChildScope->setDFSIn(++Counter); - break; - } - } - if (!visitedChildren) { - WorkStack.pop_back(); - WS->setDFSOut(++Counter); - } - } -} - -/// printDbgScopeInfo - Print DbgScope info for each machine instruction. -static -void printDbgScopeInfo(const MachineFunction *MF, - DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) -{ -#ifndef NDEBUG - LLVMContext &Ctx = MF->getFunction()->getContext(); - unsigned PrevDFSIn = 0; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; - - // Check if instruction has valid location information. - DebugLoc MIDL = MInsn->getDebugLoc(); - if (!MIDL.isUnknown()) { - MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); - dbgs() << " [ "; - if (InlinedAt) - dbgs() << "*"; - DenseMap<const MachineInstr *, DbgScope *>::iterator DI = - MI2ScopeMap.find(MInsn); - if (DI != MI2ScopeMap.end()) { - DbgScope *S = DI->second; - dbgs() << S->getDFSIn(); - PrevDFSIn = S->getDFSIn(); - } else - dbgs() << PrevDFSIn; - } else - dbgs() << " [ x" << PrevDFSIn; - dbgs() << " ]"; - MInsn->dump(); - } - dbgs() << "\n"; - } -#endif -} -/// extractScopeInformation - Scan machine instructions in this function -/// and collect DbgScopes. Return true, if at least one scope was found. -bool DwarfDebug::extractScopeInformation() { - // If scope information was extracted using .dbg intrinsics then there is not - // any need to extract these information by scanning each instruction. - if (!DbgScopeMap.empty()) - return false; - - // Scan each instruction and create scopes. First build working set of scopes. - SmallVector<DbgRange, 4> MIRanges; - DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; - DebugLoc PrevDL; - const MachineInstr *RangeBeginMI = NULL; - const MachineInstr *PrevMI = NULL; - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - - // Check if instruction has valid location information. - const DebugLoc MIDL = MInsn->getDebugLoc(); - if (MIDL.isUnknown()) { - PrevMI = MInsn; - continue; - } - - // If scope has not changed then skip this instruction. - if (MIDL == PrevDL) { - PrevMI = MInsn; - continue; - } - - // Ignore DBG_VALUE. It does not contribute any instruction in output. - if (MInsn->isDebugValue()) - continue; - - if (RangeBeginMI) { - // If we have alread seen a beginning of a instruction range and - // current instruction scope does not match scope of first instruction - // in this range then create a new instruction range. - DEBUG(dbgs() << "Creating new instruction range :\n"); - DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI); - DEBUG(dbgs() << "End Range at " << *PrevMI); - DEBUG(dbgs() << "Next Range starting at " << *MInsn); - DEBUG(dbgs() << "------------------------\n"); - DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); - MIRanges.push_back(R); - } - - // This is a beginning of a new instruction range. - RangeBeginMI = MInsn; - - // Reset previous markers. - PrevMI = MInsn; - PrevDL = MIDL; - } - } - - // Create last instruction range. - if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { - DbgRange R(RangeBeginMI, PrevMI); - MIRanges.push_back(R); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); - } - - if (!CurrentFnDbgScope) - return false; - - calculateDominanceGraph(CurrentFnDbgScope); - if (PrintDbgScope) - printDbgScopeInfo(Asm->MF, MI2ScopeMap); - - // Find ranges of instructions covered by each DbgScope; - DbgScope *PrevDbgScope = NULL; - for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(), - RE = MIRanges.end(); RI != RE; ++RI) { - const DbgRange &R = *RI; - DbgScope *S = MI2ScopeMap.lookup(R.first); - assert (S && "Lost DbgScope for a machine instruction!"); - if (PrevDbgScope && !PrevDbgScope->dominates(S)) - PrevDbgScope->closeInsnRange(S); - S->openInsnRange(R.first); - S->extendInsnRange(R.second); - PrevDbgScope = S; - } - - if (PrevDbgScope) - PrevDbgScope->closeInsnRange(); - - identifyScopeMarkers(); - - return !DbgScopeMap.empty(); -} - /// identifyScopeMarkers() - -/// Each DbgScope has first instruction and last instruction to mark beginning -/// and end of a scope respectively. Create an inverse map that list scopes -/// starts (and ends) with an instruction. One instruction may start (or end) -/// multiple scopes. Ignore scopes that are not reachable. +/// Each LexicalScope has first instruction and last instruction to mark +/// beginning and end of a scope respectively. Create an inverse map that list +/// scopes starts (and ends) with an instruction. One instruction may start (or +/// end) multiple scopes. Ignore scopes that are not reachable. void DwarfDebug::identifyScopeMarkers() { - SmallVector<DbgScope *, 4> WorkList; - WorkList.push_back(CurrentFnDbgScope); + SmallVector<LexicalScope *, 4> WorkList; + WorkList.push_back(LScopes.getCurrentFunctionScope()); while (!WorkList.empty()) { - DbgScope *S = WorkList.pop_back_val(); + LexicalScope *S = WorkList.pop_back_val(); - const SmallVector<DbgScope *, 4> &Children = S->getScopes(); + const SmallVector<LexicalScope *, 4> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); + const SmallVector<InsnRange, 4> &Ranges = S->getRanges(); if (Ranges.empty()) continue; - for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { - assert(RI->first && "DbgRange does not have first instruction!"); - assert(RI->second && "DbgRange does not have second instruction!"); + assert(RI->first && "InsnRange does not have first instruction!"); + assert(RI->second && "InsnRange does not have second instruction!"); requestLabelBeforeInsn(RI->first); requestLabelAfterInsn(RI->second); } @@ -1819,7 +1112,9 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { /// emitted immediately after the function entry point. void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; - if (!extractScopeInformation()) return; + LScopes.initialize(*MF); + if (LScopes.empty()) return; + identifyScopeMarkers(); FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); @@ -1953,7 +1248,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *Prev = History.back(); if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); - MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); @@ -1985,110 +1281,73 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } } +void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { +// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS); + ScopeVariables[LS].push_back(Var); +// Vars.push_back(Var); +} + /// endFunction - Gather and emit post-function debug information. /// void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return; - - if (CurrentFnDbgScope) { + if (!MMI->hasDebugInfo() || LScopes.empty()) return; - // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionEndSym); - - SmallPtrSet<const MDNode *, 16> ProcessedVars; - collectVariableInfo(MF, ProcessedVars); - - // Construct abstract scopes. - for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) { - DISubprogram SP((*AI)->getScopeNode()); - if (SP.Verify()) { - // Collect info for variables that were optimized out. - StringRef FName = SP.getLinkageName(); - if (FName.empty()) - FName = SP.getName(); - if (NamedMDNode *NMD = - getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast<MDNode>(NMD->getOperand(i))); - if (!DV || !ProcessedVars.insert(DV)) - continue; - DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); - if (Scope) - Scope->addVariable(new DbgVariable(DV)); - } - } + // Define end label for subprogram. + FunctionEndSym = Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionEndSym); + + SmallPtrSet<const MDNode *, 16> ProcessedVars; + collectVariableInfo(MF, ProcessedVars); + + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert (TheCU && "Unable to find compile unit!"); + + // Construct abstract scopes. + ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList(); + for (unsigned i = 0, e = AList.size(); i != e; ++i) { + LexicalScope *AScope = AList[i]; + DISubprogram SP(AScope->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + DIArray Variables = SP.getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + continue; + if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) + addScopeVariable(Scope, new DbgVariable(DV, NULL)); } - if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0) - constructScopeDIE(*AI); } - - DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); - - if (!DisableFramePointerElim(*MF)) - getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, - dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); - - - DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), - MMI->getFrameMoves())); + if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) + constructScopeDIE(TheCU, AScope); } + + DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); + + if (!DisableFramePointerElim(*MF)) + TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); + + DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), + MMI->getFrameMoves())); // Clear debug info - CurrentFnDbgScope = NULL; + for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator + I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) + DeleteContainerPointers(I->second); + ScopeVariables.clear(); DeleteContainerPointers(CurrentFnArguments); - DbgVariableToFrameIndexMap.clear(); - VarToAbstractVarMap.clear(); - DbgVariableToDbgInstMap.clear(); - InlinedDbgScopeMap.clear(); - DeleteContainerSeconds(DbgScopeMap); UserVariables.clear(); DbgValues.clear(); - DeleteContainerSeconds(AbstractScopes); - AbstractScopesList.clear(); AbstractVariables.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); PrevLabel = NULL; } -/// recordVariableFrameIndex - Record a variable's index. -void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) { - assert (V && "Invalid DbgVariable!"); - DbgVariableToFrameIndexMap[V] = Index; -} - -/// findVariableFrameIndex - Return true if frame index for the variable -/// is found. Update FI to hold value of the index. -bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { - assert (V && "Invalid DbgVariable!"); - DenseMap<const DbgVariable *, int>::iterator I = - DbgVariableToFrameIndexMap.find(V); - if (I == DbgVariableToFrameIndexMap.end()) - return false; - *FI = I->second; - return true; -} - -/// findDbgScope - Find DbgScope for the debug loc. -DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) { - if (DL.isUnknown()) - return NULL; - - DbgScope *Scope = NULL; - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - if (MDNode *IA = DL.getInlinedAt(Ctx)) - Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA)); - else - Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - return Scope; -} - - /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. @@ -2112,6 +1371,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, DISubprogram SP(S); Fn = SP.getFilename(); Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile DBF(S); + Fn = DBF.getFilename(); + Dir = DBF.getDirectory(); } else if (Scope.isLexicalBlock()) { DILexicalBlock DB(S); Fn = DB.getFilename(); @@ -2121,8 +1384,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, Src = GetOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, - 0, 0, Fn); + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); } //===----------------------------------------------------------------------===// @@ -2235,7 +1497,7 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDataSection()); } -/// emitDIE - Recusively Emits a debug information entry. +/// emitDIE - Recursively emits a debug information entry. /// void DwarfDebug::emitDIE(DIE *Die) { // Get the abbreviation for this DIE. @@ -2290,10 +1552,9 @@ void DwarfDebug::emitDIE(DIE *Die) { break; } case dwarf::DW_AT_location: { - if (UseDotDebugLocEntry.count(Die) != 0) { - DIELabel *L = cast<DIELabel>(Values[i]); + if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - } else + else Values[i]->EmitValue(Asm, Form); break; } @@ -2464,7 +1725,7 @@ void DwarfDebug::emitDebugPubNames() { Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - TheCU->getID())); + TheCU->getID())); } } @@ -2499,7 +1760,7 @@ void DwarfDebug::emitDebugPubTypes() { for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE * Entity = GI->second; + DIE *Entity = GI->second; if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index b245006..35653be 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -15,7 +15,8 @@ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Analysis/DebugInfo.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" @@ -30,7 +31,6 @@ namespace llvm { class CompileUnit; class DbgConcreteScope; -class DbgScope; class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; @@ -125,9 +125,14 @@ class DbgVariable { DIVariable Var; // Variable Descriptor. DIE *TheDIE; // Variable DIE. unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. + DbgVariable *AbsVar; // Corresponding Abstract variable, if any. + const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. + int FrameIndex; public: // AbsVar may be NULL. - DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + DbgVariable(DIVariable V, DbgVariable *AV) + : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), + FrameIndex(~0) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -136,7 +141,27 @@ public: void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } StringRef getName() const { return Var.getName(); } - unsigned getTag() const { return Var.getTag(); } + DbgVariable *getAbstractVariable() const { return AbsVar; } + const MachineInstr *getMInsn() const { return MInsn; } + void setMInsn(const MachineInstr *M) { MInsn = M; } + int getFrameIndex() const { return FrameIndex; } + void setFrameIndex(int FI) { FrameIndex = FI; } + // Translate tag to proper Dwarf tag. + unsigned getTag() const { + if (Var.getTag() == dwarf::DW_TAG_arg_variable) + return dwarf::DW_TAG_formal_parameter; + + return dwarf::DW_TAG_variable; + } + /// isArtificial - Return true if DbgVariable is artificial. + bool isArtificial() const { + if (Var.isArtificial()) + return true; + if (Var.getTag() == dwarf::DW_TAG_arg_variable + && getType().isArtificial()) + return true; + return false; + } bool variableHasComplexAddress() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); @@ -167,8 +192,13 @@ class DwarfDebug { // CompileUnit *FirstCU; + + /// Maps MDNode with its corresponding CompileUnit. DenseMap <const MDNode *, CompileUnit *> CUMap; + /// Maps subprogram MDNode with its corresponding CompileUnit. + DenseMap <const MDNode *, CompileUnit *> SPMap; + /// AbbreviationsSet - Used to uniquely define abbreviations. /// FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -192,63 +222,27 @@ class DwarfDebug { /// UniqueVector<const MCSection*> SectionMap; - /// CurrentFnDbgScope - Top level scope for the current function. - /// - DbgScope *CurrentFnDbgScope; - /// CurrentFnArguments - List of Arguments (DbgValues) for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; - /// DbgScopeMap - Tracks the scopes in the current function. Owns the - /// contained DbgScope*s. - DenseMap<const MDNode *, DbgScope *> DbgScopeMap; - - /// InlinedDbgScopeMap - Tracks inlined function scopes in current function. - DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap; - - /// AbstractScopes - Tracks the abstract scopes a module. These scopes are - /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. - DenseMap<const MDNode *, DbgScope *> AbstractScopes; + LexicalScopes LScopes; /// AbstractSPDies - Collection of abstract subprogram DIEs. DenseMap<const MDNode *, DIE *> AbstractSPDies; - /// AbstractScopesList - Tracks abstract scopes constructed while processing - /// a function. This list is cleared during endFunction(). - SmallVector<DbgScope *, 4>AbstractScopesList; + /// ScopeVariables - Collection of dbg variables of a scope. + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; - /// AbstractVariables - Collection on abstract variables. Owned by the - /// DbgScopes in AbstractScopes. + /// AbstractVariables - Collection on abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; - /// DbgVariableToFrameIndexMap - Tracks frame index used to find - /// variable's value. - DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap; - - /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE - /// machine instruction. - DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap; - /// DotDebugLocEntries - Collection of DotDebugLocEntry. SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; - /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set - /// idetifies corresponding .debug_loc entry offset. - SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry; - - /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract - /// DbgVariable, if any. - DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap; - /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; - /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that - /// need DW_AT_containing_type attribute. This attribute points to a DIE that - /// corresponds to the MDNode mapped with the subprogram DIE. - DenseMap<DIE *, const MDNode *> ContainingTypeMap; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; @@ -316,10 +310,7 @@ private: /// void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// getOrCreateDbgScope - Create DbgScope for the scope. - DbgScope *getOrCreateDbgScope(DebugLoc DL); - - DbgScope *getOrCreateAbstractScope(const MDNode *N); + void addScopeVariable(LexicalScope *LS, DbgVariable *Var); /// findAbstractVariable - Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); @@ -328,22 +319,22 @@ private: /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert /// DIEs for these variables. - DIE *updateSubprogramScopeDIE(const MDNode *SPNode); + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); /// constructLexicalScope - Construct new DW_TAG_lexical_block /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. - DIE *constructLexicalScopeDIE(DbgScope *Scope); + DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); /// constructInlinedScopeDIE - This scope represents inlined body of /// a function. Construct DIE to represent this concrete inlined copy /// of the function. - DIE *constructInlinedScopeDIE(DbgScope *Scope); + DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S); + DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S); /// constructScopeDIE - Construct a DIE for this scope. - DIE *constructScopeDIE(DbgScope *Scope); + DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); /// EmitSectionLabels - Emit initial Dwarf sections with a label at /// the start of each one. @@ -424,16 +415,10 @@ private: /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. - void constructCompileUnit(const MDNode *N); - - /// getCompielUnit - Get CompileUnit DIE. - CompileUnit *getCompileUnit(const MDNode *N) const; - - /// constructGlobalVariableDIE - Construct global variable DIE. - void constructGlobalVariableDIE(const MDNode *N); + CompileUnit *constructCompileUnit(const MDNode *N); /// construct SubprogramDIE - Construct subprogram DIE. - void constructSubprogramDIE(const MDNode *N); + void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to @@ -441,30 +426,16 @@ private: void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - /// recordVariableFrameIndex - Record a variable's index. - void recordVariableFrameIndex(const DbgVariable *V, int Index); - - /// findVariableFrameIndex - Return true if frame index for the variable - /// is found. Update FI to hold value of the index. - bool findVariableFrameIndex(const DbgVariable *V, int *FI); - - /// findDbgScope - Find DbgScope for the debug loc. - DbgScope *findDbgScope(DebugLoc DL); - /// identifyScopeMarkers() - Indentify instructions that are marking /// beginning of or end of a scope. void identifyScopeMarkers(); - /// extractScopeInformation - Scan machine instructions in this function - /// and collect DbgScopes. Return true, if atleast one scope was found. - bool extractScopeInformation(); - /// addCurrentFnArgument - If Var is an current function argument that add /// it in CurrentFnArguments list. bool addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, DbgScope *Scope); + DbgVariable *Var, LexicalScope *Scope); - /// collectVariableInfo - Populate DbgScope entries with variables' info. + /// collectVariableInfo - Populate LexicalScope entries with variables' info. void collectVariableInfo(const MachineFunction *, SmallPtrSet<const MDNode *, 16> &ProcessedVars); @@ -496,6 +467,14 @@ public: DwarfDebug(AsmPrinter *A, Module *M); ~DwarfDebug(); + /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such + /// as llvm.dbg.enum and llvm.dbg.ty + void collectInfoFromNamedMDNodes(Module *M); + + /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. + /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder. + bool collectLegacyDebugInfo(Module *M); + /// beginModule - Emit all Dwarf sections that should come prior to the /// content. void beginModule(Module *M); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp index 1f992fa..18b726b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -527,29 +526,26 @@ void DwarfException::EmitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { const CallSiteEntry &S = *I; + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. if (VerboseAsm) { - // Emit comments that decode the call site. Asm->OutStreamer.AddComment(Twine(">> Call Site ") + llvm::utostr(idx) + " <<"); Asm->OutStreamer.AddComment(Twine(" On exception at call site ") + llvm::utostr(idx)); + } + Asm->EmitULEB128(idx); + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { if (S.Action == 0) Asm->OutStreamer.AddComment(" Action: cleanup"); else Asm->OutStreamer.AddComment(Twine(" Action: ") + llvm::utostr((S.Action - 1) / 2 + 1)); - - Asm->OutStreamer.AddBlankLine(); } - - // Offset of the landing pad, counted in 16-byte bundles relative to the - // @LPStart address. - Asm->EmitULEB128(idx); - - // Offset of the first associated action record, relative to the start of - // the action table. This value is biased by 1 (1 indicates the start of - // the action table), and 0 indicates that there are no actions. Asm->EmitULEB128(S.Action); } } else { @@ -595,46 +591,43 @@ void DwarfException::EmitExceptionTable() { if (EndLabel == 0) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - if (VerboseAsm) { - // Emit comments that decode the call site. - Asm->OutStreamer.AddComment(Twine(">> Call Site ") + - llvm::utostr(++Entry) + " <<"); - Asm->OutStreamer.AddComment(Twine(" Call between ") + - BeginLabel->getName() + " and " + - EndLabel->getName()); - - if (!S.PadLabel) { - Asm->OutStreamer.AddComment(" has no landing pad"); - } else { - Asm->OutStreamer.AddComment(Twine(" jumps to ") + - S.PadLabel->getName()); - - if (S.Action == 0) - Asm->OutStreamer.AddComment(" On action: cleanup"); - else - Asm->OutStreamer.AddComment(Twine(" On action: ") + - llvm::utostr((S.Action - 1) / 2 + 1)); - } - - Asm->OutStreamer.AddBlankLine(); - } // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine(">> Call Site ") + + llvm::utostr(++Entry) + " <<"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.PadLabel) + if (!S.PadLabel) { + if (VerboseAsm) + Asm->OutStreamer.AddComment(" has no landing pad"); Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - else + } else { + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine(" jumps to ") + + S.PadLabel->getName()); Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + } // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer.AddComment(" On action: cleanup"); + else + Asm->OutStreamer.AddComment(Twine(" On action: ") + + llvm::utostr((S.Action - 1) / 2 + 1)); + } Asm->EmitULEB128(S.Action); } } @@ -649,13 +642,29 @@ void DwarfException::EmitExceptionTable() { // Emit comments that decode the action table. Asm->OutStreamer.AddComment(Twine(">> Action Record ") + llvm::utostr(++Entry) + " <<"); - if (Action.ValueForTypeID >= 0) + } + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. + if (VerboseAsm) { + if (Action.ValueForTypeID > 0) Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") + llvm::itostr(Action.ValueForTypeID)); - else + else if (Action.ValueForTypeID < 0) Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") + llvm::itostr(Action.ValueForTypeID)); + else + Asm->OutStreamer.AddComment(" Cleanup"); + } + Asm->EmitSLEB128(Action.ValueForTypeID); + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + if (VerboseAsm) { if (Action.NextAction == 0) { Asm->OutStreamer.AddComment(" No further actions"); } else { @@ -663,20 +672,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment(Twine(" Continue to action ") + llvm::utostr(NextAction)); } - - Asm->OutStreamer.AddBlankLine(); } - - // Type Filter - // - // Used by the runtime to match the type of the thrown exception to the - // type of the catch clauses or the types in the exception specification. - Asm->EmitSLEB128(Action.ValueForTypeID); - - // Action Record - // - // Self-relative signed displacement in bytes of the next action record, - // or 0 if there is no next action record. Asm->EmitSLEB128(Action.NextAction); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp index c2ad5eb..b83aa5a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 99090a8..75288b0 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -1624,26 +1624,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) break; + // Remove kills from LocalDefsSet, these registers had short live ranges. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || !LocalDefsSet.count(Reg)) + continue; + for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + LocalDefsSet.erase(*OR); + } + // Track local defs so we can update liveins. for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { MachineOperand &MO = TIB->getOperand(i); - if (!MO.isReg()) + if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; - if (MO.isDef()) { - if (!MO.isDead()) { - LocalDefs.push_back(Reg); - LocalDefsSet.insert(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - LocalDefsSet.insert(*SR); - } - } else if (MO.isKill() && LocalDefsSet.count(Reg)) { - LocalDefsSet.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - LocalDefsSet.erase(*SR); - } + LocalDefs.push_back(Reg); + for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + LocalDefsSet.insert(*OR); } HasDups = true;; diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index e6b3bbc..ea16a25 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -185,35 +185,3 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { li.weight = normalizeSpillWeight(totalWeight, li.getSize()); } - -void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - const TargetRegisterClass *OldRC = MRI.getRegClass(reg); - const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); - - // Stop early if there is no room to grow. - if (NewRC == OldRC) - return; - - // Accumulate constraints from all uses. - for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg), - E = MRI.reg_nodbg_end(); I != E; ++I) { - // TRI doesn't have accurate enough information to model this yet. - if (I.getOperand().getSubReg()) - return; - // Inline asm instuctions don't remember their constraints. - if (I->isInlineAsm()) - return; - const TargetRegisterClass *OpRC = - TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI); - if (OpRC) - NewRC = getCommonSubClass(NewRC, OpRC); - if (!NewRC || NewRC == OldRC) - return; - } - DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg) - << " to " << NewRC->getName() <<".\n"); - MRI.setRegClass(reg, NewRC); -} diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 489746c..424535b 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -27,6 +27,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveIntervalsPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); + initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineLICMPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 03604b0..ed9e409 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -63,6 +63,8 @@ namespace { typedef SmallPtrSet<BasicBlock*, 8> BBSet; BBSet LandingPads; + bool InsertUnwindResumeCalls(); + bool NormalizeLandingPads(); bool LowerUnwindsAndResumes(); bool MoveExceptionValueCalls(); @@ -658,13 +660,76 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); } +/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present +/// into calls to the appropriate _Unwind_Resume function. +bool DwarfEHPrepare::InsertUnwindResumeCalls() { + bool UsesNewEH = false; + SmallVector<ResumeInst*, 16> Resumes; + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) + Resumes.push_back(RI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) + UsesNewEH = II->getUnwindDest()->isLandingPad(); + } + + if (Resumes.empty()) + return UsesNewEH; + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + LLVMContext &Ctx = Resumes[0]->getContext(); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Type::getInt8PtrTy(Ctx), false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + // Create the basic block where the _Unwind_Resume call will live. + LLVMContext &Ctx = F->getContext(); + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(), + "exn.obj", UnwindBB); + + // Extract the exception object from the ResumeInst and add it to the PHI node + // that feeds the _Unwind_Resume call. + BasicBlock *UnwindBBDom = Resumes[0]->getParent(); + for (SmallVectorImpl<ResumeInst*>::iterator + I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { + ResumeInst *RI = *I; + BranchInst::Create(UnwindBB, RI->getParent()); + ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0), + 0, "exn.obj", RI); + PN->addIncoming(ExnObj, RI->getParent()); + UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom); + RI->eraseFromParent(); + } + + // Call the function. + CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + + // Now update DominatorTree analysis information. + DT->addNewBlock(UnwindBB, UnwindBBDom); + return true; +} + bool DwarfEHPrepare::runOnFunction(Function &Fn) { bool Changed = false; // Initialize internal state. - DT = &getAnalysis<DominatorTree>(); + DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH. F = &Fn; + if (InsertUnwindResumeCalls()) { + // FIXME: The reset of this function can go once the new EH is done. + LandingPads.clear(); + return true; + } + // Ensure that only unwind edges end at landing pads (a landing pad is a // basic block where an invoke unwind edge ends). Changed |= NormalizeLandingPads(); diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp index 3fb087c..660424c3 100644 --- a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp @@ -155,7 +155,7 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { CPSections.push_back(CstPool.SectionIdx); if (CPE.isMachineConstantPoolEntry()) - assert("CPE.isMachineConstantPoolEntry not supported yet"); + assert(0 && "CPE.isMachineConstantPoolEntry not supported yet"); // Emit the constant to constant pool section EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h index 2ec1f6e..8671c67 100644 --- a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h +++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h @@ -58,13 +58,13 @@ namespace llvm { /// emitLabel - Emits a label virtual void emitLabel(MCSymbol *Label) { - assert("emitLabel not implemented"); + assert(0 && "emitLabel not implemented"); } /// getLabelAddress - Return the address of the specified LabelID, /// only usable after the LabelID has been emitted. virtual uintptr_t getLabelAddress(MCSymbol *Label) const { - assert("getLabelAddress not implemented"); + assert(0 && "getLabelAddress not implemented"); return 0; } diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp index d977651..f2c2185 100644 --- a/contrib/llvm/lib/CodeGen/ELFWriter.cpp +++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp @@ -45,12 +45,12 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetELFWriterInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -65,7 +65,8 @@ char ELFWriter::ID = 0; ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(ID), O(o), TM(tm), - OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))), + OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(), + &TM.getTargetLowering()->getObjFileLowering())), TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), isLittleEndian(TM.getTargetData()->isLittleEndian()), @@ -482,7 +483,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { EmitGlobalConstantLargeInt(CI, GblS); return; } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { - const VectorType *PTy = CP->getType(); + VectorType *PTy = CP->getType(); for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) EmitGlobalConstant(CP->getOperand(I), GblS); return; @@ -540,8 +541,7 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { case Instruction::GetElementPtr: { const Constant *ptrVal = CE->getOperand(0); SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], - idxVec.size()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec); return std::make_pair(ptrVal, Offset); } case Instruction::IntToPtr: { @@ -552,7 +552,7 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { } case Instruction::PtrToInt: { Constant *Op = CE->getOperand(0); - const Type *Ty = CE->getType(); + Type *Ty = CE->getType(); // We can emit the pointer value into this slot if the slot is an // integer slot greater or equal to the size of the pointer. diff --git a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 13680c5..01dccdb 100644 --- a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -1,4 +1,4 @@ -//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// +//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,21 +7,25 @@ // //===----------------------------------------------------------------------===// // -// This file contains the SSEDomainFix pass. +// This file contains the execution dependency fix pass. // -// Some SSE instructions like mov, and, or, xor are available in different +// Some X86 SSE instructions like mov, and, or, xor are available in different // variants for different operand types. These variant instructions are // equivalent, but on Nehalem and newer cpus there is extra latency -// transferring data between integer and floating point domains. +// transferring data between integer and floating point domains. ARM cores +// have similar issues when they are configured with both VFP and NEON +// pipelines. // // This pass changes the variant instructions to minimize domain crossings. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sse-domain-fix" -#include "X86InstrInfo.h" +#define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" @@ -97,25 +101,27 @@ struct DomainValue { }; } -static const unsigned NumRegs = 16; - namespace { -class SSEDomainFixPass : public MachineFunctionPass { +class ExeDepsFix : public MachineFunctionPass { static char ID; SpecificBumpPtrAllocator<DomainValue> Allocator; SmallVector<DomainValue*,16> Avail; + const TargetRegisterClass *const RC; MachineFunction *MF; - const X86InstrInfo *TII; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineBasicBlock *MBB; + std::vector<int> AliasMap; + const unsigned NumRegs; DomainValue **LiveRegs; typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; LiveOutMap LiveOuts; unsigned Distance; public: - SSEDomainFixPass() : MachineFunctionPass(ID) {} + ExeDepsFix(const TargetRegisterClass *rc) + : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -150,17 +156,16 @@ private: }; } -char SSEDomainFixPass::ID = 0; +char ExeDepsFix::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. -int SSEDomainFixPass::RegIndex(unsigned reg) { - assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); - reg -= X86::XMM0; - return reg < NumRegs ? (int) reg : -1; +int ExeDepsFix::RegIndex(unsigned Reg) { + assert(Reg < AliasMap.size() && "Invalid register"); + return AliasMap[Reg]; } -DomainValue *SSEDomainFixPass::Alloc(int domain) { +DomainValue *ExeDepsFix::Alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); @@ -170,14 +175,14 @@ DomainValue *SSEDomainFixPass::Alloc(int domain) { return dv; } -void SSEDomainFixPass::Recycle(DomainValue *dv) { +void ExeDepsFix::Recycle(DomainValue *dv) { assert(dv && "Cannot recycle NULL"); dv->clear(); Avail.push_back(dv); } /// Set LiveRegs[rx] = dv, updating reference counts. -void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { +void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs) { LiveRegs = new DomainValue*[NumRegs]; @@ -195,7 +200,7 @@ void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { } // Kill register rx, recycle or collapse any DomainValue. -void SSEDomainFixPass::Kill(int rx) { +void ExeDepsFix::Kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs || !LiveRegs[rx]) return; @@ -208,7 +213,7 @@ void SSEDomainFixPass::Kill(int rx) { } /// Force register rx into domain. -void SSEDomainFixPass::Force(int rx, unsigned domain) { +void ExeDepsFix::Force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); DomainValue *dv; if (LiveRegs && (dv = LiveRegs[rx])) { @@ -217,8 +222,8 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) { else if (dv->hasDomain(domain)) Collapse(dv, domain); else { - // This is an incompatible open DomainValue. Collapse it to whatever and force - // the new value into domain. This costs a domain crossing. + // This is an incompatible open DomainValue. Collapse it to whatever and + // force the new value into domain. This costs a domain crossing. Collapse(dv, dv->getFirstDomain()); assert(LiveRegs[rx] && "Not live after collapse?"); LiveRegs[rx]->addDomain(domain); @@ -231,12 +236,12 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) { /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. -void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { +void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { assert(dv->hasDomain(domain) && "Cannot collapse"); // Collapse all the instructions. while (!dv->Instrs.empty()) - TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain); + TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); dv->setSingleDomain(domain); // If there are multiple users, give them new, unique DomainValues. @@ -248,7 +253,7 @@ void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { /// Merge - All instructions and registers in B are moved to A, and B is /// released. -bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { +bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); if (A == B) @@ -266,7 +271,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { return true; } -void SSEDomainFixPass::enterBasicBlock() { +void ExeDepsFix::enterBasicBlock() { // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { @@ -303,7 +308,7 @@ void SSEDomainFixPass::enterBasicBlock() { // A hard instruction only works in one domain. All input registers will be // forced into that domain. -void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { +void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { // Collapse all uses. for (unsigned i = mi->getDesc().getNumDefs(), e = mi->getDesc().getNumOperands(); i != e; ++i) { @@ -326,7 +331,7 @@ void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { } // A soft instruction can be changed to work in other domains given by mask. -void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { +void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Bitmask of available domains for this instruction after taking collapsed // operands into account. unsigned available = mask; @@ -362,7 +367,7 @@ void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { unsigned domain = CountTrailingZeros_32(available); - TII->SetSSEDomain(mi, domain); + TII->setExecutionDomain(mi, domain); visitHardInstr(mi, domain); return; } @@ -431,8 +436,8 @@ void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { } } -void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any XMM registers redefined. +void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { + // Process explicit defs, kill any relevant registers redefined. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; @@ -442,25 +447,36 @@ void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { } } -bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { +bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); + TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); MBB = 0; LiveRegs = 0; Distance = 0; - assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); - // If no XMM registers are used in the function, we can skip it completely. + // If no relevant registers are used in the function, we can skip it + // completely. bool anyregs = false; - for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), - E = X86::VR128RegClass.end(); I != E; ++I) + for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); + I != E; ++I) if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } if (!anyregs) return false; + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, + // or -1. + AliasMap.resize(TRI->getNumRegs(), -1); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + AliasMap[*AI] = i; + } + MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*, 16> Visited; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > @@ -473,7 +489,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { MachineInstr *mi = I; if (mi->isDebugValue()) continue; ++Distance; - std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi); + std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi); if (domp.first) if (domp.second) visitSoftInstr(mi, domp.second); @@ -501,6 +517,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { return false; } -FunctionPass *llvm::createSSEDomainFixPass() { - return new SSEDomainFixPass(); +FunctionPass * +llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { + return new ExeDepsFix(RC); } diff --git a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 7871ba9..e2a14a8 100644 --- a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -1,4 +1,4 @@ -//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===// +//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file defines a MachineFunction pass which runs after register -// allocation that turns subreg insert/extract instructions into register -// copies, as needed. This ensures correct codegen even if the coalescer -// isn't able to remove all subreg instructions. +// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo +// instructions after register allocation. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lowersubregs" +#define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -29,52 +27,51 @@ using namespace llvm; namespace { - struct LowerSubregsInstructionPass : public MachineFunctionPass { - private: - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; +struct ExpandPostRA : public MachineFunctionPass { +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; - public: - static char ID; // Pass identification, replacement for typeid - LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} +public: + static char ID; // Pass identification, replacement for typeid + ExpandPostRA() : MachineFunctionPass(ID) {} - const char *getPassName() const { - return "Subregister lowering instruction pass"; - } + const char *getPassName() const { + return "Post-RA pseudo instruction expansion pass"; + } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } - /// runOnMachineFunction - pass entry point - bool runOnMachineFunction(MachineFunction&); + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); - private: - bool LowerSubregToReg(MachineInstr *MI); - bool LowerCopy(MachineInstr *MI); +private: + bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); - void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI); - void TransferImplicitDefs(MachineInstr *MI); - }; + void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo *TRI); + void TransferImplicitDefs(MachineInstr *MI); +}; +} // end anonymous namespace - char LowerSubregsInstructionPass::ID = 0; -} +char ExpandPostRA::ID = 0; -FunctionPass *llvm::createLowerSubregsPass() { - return new LowerSubregsInstructionPass(); +FunctionPass *llvm::createExpandPostRAPseudosPass() { + return new ExpandPostRA(); } /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, /// and the lowered replacement instructions immediately precede it. /// Mark the replacement instructions with the dead flag. void -LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, - unsigned DstReg, - const TargetRegisterInfo *TRI) { +ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo *TRI) { for (MachineBasicBlock::iterator MII = prior(MachineBasicBlock::iterator(MI)); ; --MII) { if (MII->addRegisterDead(DstReg, TRI)) @@ -88,7 +85,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. void -LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { +ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; @@ -100,7 +97,7 @@ LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { } } -bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { +bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MachineBasicBlock *MBB = MI->getParent(); assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && MI->getOperand(1).isImm() && @@ -152,7 +149,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { return true; } -bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { +bool ExpandPostRA::LowerCopy(MachineInstr *MI) { MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); @@ -191,9 +188,9 @@ bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { /// runOnMachineFunction - Reduce subregister inserts and extracts to register /// copies. /// -bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { +bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" - << "********** LOWERING SUBREG INSTRS **********\n" + << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" << "********** Function: " << MF.getFunction()->getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); @@ -205,17 +202,34 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mbbi != mbbe; ++mbbi) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me;) { - MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; - assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear"); - assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - "EXTRACT_SUBREG should no longer appear"); - if (MI->isSubregToReg()) { + // Advance iterator here because MI may be erased. + ++mi; + + // Only expand pseudos. + if (!MI->getDesc().isPseudo()) + continue; + + // Give targets a chance to expand even standard pseudos. + if (TII->expandPostRAPseudo(MI)) { + MadeChange = true; + continue; + } + + // Expand standard pseudos. + switch (MI->getOpcode()) { + case TargetOpcode::SUBREG_TO_REG: MadeChange |= LowerSubregToReg(MI); - } else if (MI->isCopy()) { + break; + case TargetOpcode::COPY: MadeChange |= LowerCopy(MI); + break; + case TargetOpcode::DBG_VALUE: + continue; + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::EXTRACT_SUBREG: + llvm_unreachable("Sub-register pseudos should have been eliminated."); } - mi = nmi; } } diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 6cb2277..ce7ed29 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -16,14 +16,13 @@ #include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -154,7 +153,8 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; - const MachineLoopInfo *MLI; + const MachineBranchProbabilityInfo *MBPI; + bool MadeChange; int FnNum; public: @@ -162,9 +162,9 @@ namespace { IfConverter() : MachineFunctionPass(ID), FnNum(-1) { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -252,7 +252,7 @@ namespace { } INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } @@ -261,7 +261,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); - MLI = &getAnalysis<MachineLoopInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; @@ -790,28 +790,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool TNeedSub = TrueBBI.Predicate.size() > 0; bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; - - // Try to predict the branch, using loop info to guide us. - // General heuristics are: - // - backedge -> 90% taken - // - early exit -> 20% taken - // - branch predictor confidence -> 90% - BranchProbability Prediction(5, 10); - MachineLoop *Loop = MLI->getLoopFor(BB); - if (Loop) { - if (TrueBBI.BB == Loop->getHeader()) - Prediction = BranchProbability(9, 10); - else if (FalseBBI.BB == Loop->getHeader()) - Prediction = BranchProbability(1, 10); - - MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); - MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); - if (!TrueLoop || TrueLoop->getParentLoop() == Loop) - Prediction = BranchProbability(2, 10); - else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) - Prediction = BranchProbability(8, 10); - } - + + BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 5547f73..726af46 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -17,6 +17,7 @@ #include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -27,22 +28,26 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); -STATISTIC(NumSnippets, "Number of snippets included in spills"); +STATISTIC(NumSnippets, "Number of spilled snippets"); STATISTIC(NumSpills, "Number of spills inserted"); +STATISTIC(NumSpillsRemoved, "Number of spills removed"); STATISTIC(NumReloads, "Number of reloads inserted"); +STATISTIC(NumReloadsRemoved, "Number of reloads removed"); STATISTIC(NumFolded, "Number of folded stack accesses"); STATISTIC(NumFoldedLoads, "Number of folded loads"); STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); -STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads"); -STATISTIC(NumHoistLocal, "Number of locally hoisted spills"); -STATISTIC(NumHoistGlobal, "Number of globally hoisted spills"); -STATISTIC(NumRedundantSpills, "Number of redundant spills identified"); +STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads"); +STATISTIC(NumHoists, "Number of hoisted spills"); + +static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, + cl::desc("Disable inline spill hoisting")); namespace { class InlineSpiller : public Spiller { @@ -75,26 +80,49 @@ class InlineSpiller : public Spiller { // Values that failed to remat at some point. SmallPtrSet<VNInfo*, 8> UsedValues; +public: // Information about a value that was defined by a copy from a sibling // register. struct SibValueInfo { // True when all reaching defs were reloads: No spill is necessary. bool AllDefsAreReloads; + // True when value is defined by an original PHI not from splitting. + bool DefByOrigPHI; + + // True when the COPY defining this value killed its source. + bool KillsSource; + // The preferred register to spill. unsigned SpillReg; // The value of SpillReg that should be spilled. VNInfo *SpillVNI; + // The block where SpillVNI should be spilled. Currently, this must be the + // block containing SpillVNI->def. + MachineBasicBlock *SpillMBB; + // A defining instruction that is not a sibling copy or a reload, or NULL. // This can be used as a template for rematerialization. MachineInstr *DefMI; + // List of values that depend on this one. These values are actually the + // same, but live range splitting has placed them in different registers, + // or SSA update needed to insert PHI-defs to preserve SSA form. This is + // copies of the current value and phi-kills. Usually only phi-kills cause + // more than one dependent value. + TinyPtrVector<VNInfo*> Deps; + SibValueInfo(unsigned Reg, VNInfo *VNI) - : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {} + : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), + SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {} + + // Returns true when a def has been found. + bool hasDef() const { return DefByOrigPHI || DefMI; } }; +private: // Values in RegsToSpill defined by sibling copies. typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; SibValueMap SibValues; @@ -134,6 +162,7 @@ private: bool isSibling(unsigned Reg); MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0); void analyzeSiblingValues(); bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); @@ -282,6 +311,156 @@ bool InlineSpiller::isSibling(unsigned Reg) { VRM.getOriginal(Reg) == Original; } +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const InlineSpiller::SibValueInfo &SVI) { + OS << "spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def; + if (SVI.SpillMBB) + OS << " in BB#" << SVI.SpillMBB->getNumber(); + if (SVI.AllDefsAreReloads) + OS << " all-reloads"; + if (SVI.DefByOrigPHI) + OS << " orig-phi"; + if (SVI.KillsSource) + OS << " kill"; + OS << " deps["; + for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i) + OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def; + OS << " ]"; + if (SVI.DefMI) + OS << " def: " << *SVI.DefMI; + else + OS << '\n'; + return OS; +} +#endif + +/// propagateSiblingValue - Propagate the value in SVI to dependents if it is +/// known. Otherwise remember the dependency for later. +/// +/// @param SVI SibValues entry to propagate. +/// @param VNI Dependent value, or NULL to propagate to all saved dependents. +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, + VNInfo *VNI) { + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. + TinyPtrVector<VNInfo*> FirstDeps; + if (VNI) { + FirstDeps.push_back(VNI); + SVI->second.Deps.push_back(VNI); + } + + // Has the value been completely determined yet? If not, defer propagation. + if (!SVI->second.hasDef()) + return; + + // Work list of values to propagate. It would be nice to use a SetVector + // here, but then we would be forced to use a SmallSet. + SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI); + SmallPtrSet<VNInfo*, 8> WorkSet; + + do { + SVI = WorkList.pop_back_val(); + WorkSet.erase(SVI->first); + TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; + VNI = 0; + + SibValueInfo &SV = SVI->second; + if (!SV.SpillMBB) + SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); + + DEBUG(dbgs() << " prop to " << Deps->size() << ": " + << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); + + assert(SV.hasDef() && "Propagating undefined value"); + + // Should this value be propagated as a preferred spill candidate? We don't + // propagate values of registers that are about to spill. + bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); + unsigned SpillDepth = ~0u; + + for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(), + DepE = Deps->end(); DepI != DepE; ++DepI) { + SibValueMap::iterator DepSVI = SibValues.find(*DepI); + assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); + SibValueInfo &DepSV = DepSVI->second; + if (!DepSV.SpillMBB) + DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); + + bool Changed = false; + + // Propagate defining instruction. + if (!DepSV.hasDef()) { + Changed = true; + DepSV.DefMI = SV.DefMI; + DepSV.DefByOrigPHI = SV.DefByOrigPHI; + } + + // Propagate AllDefsAreReloads. For PHI values, this computes an AND of + // all predecessors. + if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { + Changed = true; + DepSV.AllDefsAreReloads = false; + } + + // Propagate best spill value. + if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { + if (SV.SpillMBB == DepSV.SpillMBB) { + // DepSV is in the same block. Hoist when dominated. + if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + // This hoist only helps when the DepSV copy kills its source. + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } else { + // DepSV is in a different block. + if (SpillDepth == ~0u) + SpillDepth = Loops.getLoopDepth(SV.SpillMBB); + + // Also hoist spills to blocks with smaller loop depth, but make sure + // that the new value dominates. Non-phi dependents are always + // dominated, phis need checking. + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + (!DepSVI->first->isPHIDef() || + MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } + } + + if (!Changed) + continue; + + // Something changed in DepSVI. Propagate to dependents. + if (WorkSet.insert(DepSVI->first)) + WorkList.push_back(DepSVI); + + DEBUG(dbgs() << " update " << DepSVI->first->id << '@' + << DepSVI->first->def << " to:\t" << DepSV); + } + } while (!WorkList.empty()); +} + /// traceSiblingValue - Trace a value that is about to be spilled back to the /// real defining instructions by looking through sibling copies. Always stay /// within the range of OrigVNI so the registers are known to carry the same @@ -294,84 +473,101 @@ bool InlineSpiller::isSibling(unsigned Reg) { /// MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, VNInfo *OrigVNI) { + // Check if a cached value already exists. + SibValueMap::iterator SVI; + bool Inserted; + tie(SVI, Inserted) = + SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); + if (!Inserted) { + DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); + return SVI->second.DefMI; + } + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' << UseVNI->id << '@' << UseVNI->def << '\n'); - SmallPtrSet<VNInfo*, 8> Visited; + + // List of (Reg, VNI) that have been inserted into SibValues, but need to be + // processed. SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(UseReg, UseVNI)); - // Best spill candidate seen so far. This must dominate UseVNI. - SibValueInfo SVI(UseReg, UseVNI); - MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); - MachineBasicBlock *SpillMBB = UseMBB; - unsigned SpillDepth = Loops.getLoopDepth(SpillMBB); - bool SeenOrigPHI = false; // Original PHI met. - do { unsigned Reg; VNInfo *VNI; tie(Reg, VNI) = WorkList.pop_back_val(); - if (!Visited.insert(VNI)) - continue; + DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << ":\t"); - // Is this value a better spill candidate? - if (!isRegToSpill(Reg)) { - MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - if (MBB == SpillMBB) { - // This is an alternative def earlier in the same MBB. - // Hoist the spill as far as possible in SpillMBB. This can ease - // register pressure: - // - // x = def - // y = use x - // s = copy x - // - // Hoisting the spill of s to immediately after the def removes the - // interference between x and y: - // - // x = def - // spill x - // y = use x<kill> - // - if (VNI->def < SVI.SpillVNI->def) { - DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": " - << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def - << '\n'); - SVI.SpillReg = Reg; - SVI.SpillVNI = VNI; - } - } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { - // This is a valid spill location dominating UseVNI. - // Prefer to spill at a smaller loop depth. - unsigned Depth = Loops.getLoopDepth(MBB); - if (Depth < SpillDepth) { - DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg) - << ':' << VNI->id << '@' << VNI->def << '\n'); - SVI.SpillReg = Reg; - SVI.SpillVNI = VNI; - SpillMBB = MBB; - SpillDepth = Depth; - } - } - } + // First check if this value has already been computed. + SVI = SibValues.find(VNI); + assert(SVI != SibValues.end() && "Missing SibValues entry"); // Trace through PHI-defs created by live range splitting. if (VNI->isPHIDef()) { + // Stop at original PHIs. We don't know the value at the predecessors. if (VNI->def == OrigVNI->def) { - DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':' - << VNI->id << '@' << VNI->def << '\n'); - SeenOrigPHI = true; + DEBUG(dbgs() << "orig phi value\n"); + SVI->second.DefByOrigPHI = true; + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); continue; } - // Get values live-out of predecessors. + + // This is a PHI inserted by live range splitting. We could trace the + // live-out value from predecessor blocks, but that search can be very + // expensive if there are many predecessors and many more PHIs as + // generated by tail-dup when it sees an indirectbr. Instead, look at + // all the non-PHI defs that have the same value as OrigVNI. They must + // jointly dominate VNI->def. This is not optimal since VNI may actually + // be jointly dominated by a smaller subset of defs, so there is a change + // we will miss a AllDefsAreReloads optimization. + + // Separate all values dominated by OrigVNI into PHIs and non-PHIs. + SmallVector<VNInfo*, 8> PHIs, NonPHIs; LiveInterval &LI = LIS.getInterval(Reg); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); - if (PVNI) - WorkList.push_back(std::make_pair(Reg, PVNI)); + LiveInterval &OrigLI = LIS.getInterval(Original); + + for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); + VI != VE; ++VI) { + VNInfo *VNI2 = *VI; + if (VNI2->isUnused()) + continue; + if (!OrigLI.containsOneValue() && + OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) + continue; + if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) + PHIs.push_back(VNI2); + else + NonPHIs.push_back(VNI2); + } + DEBUG(dbgs() << "split phi value, checking " << PHIs.size() + << " phi-defs, and " << NonPHIs.size() + << " non-phi/orig defs\n"); + + // Create entries for all the PHIs. Don't add them to the worklist, we + // are processing all of them in one go here. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i]))); + + // Add every PHI as a dependent of all the non-PHIs. + for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { + VNInfo *NonPHI = NonPHIs[i]; + // Known value? Try an insertion. + tie(SVI, Inserted) = + SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); + // Add all the PHIs as dependents of NonPHI. + for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) + SVI->second.Deps.push_back(PHIs[pi]); + // This is the first time we see NonPHI, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(Reg, NonPHI)); + else + // Propagate to all inserted PHIs, not just VNI. + propagateSiblingValue(SVI); } + + // Next work list item. continue; } @@ -382,48 +578,49 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex()); - assert(SrcVNI && "Copy from non-existing value"); - DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':' - << SrcVNI->id << '@' << SrcVNI->def << '\n'); - WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex()); + assert(SrcLR && "Copy from non-existing value"); + // Check if this COPY kills its source. + SVI->second.KillsSource = (SrcLR->end == VNI->def); + VNInfo *SrcVNI = SrcLR->valno; + DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def + << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); + // Known sibling source value? Try an insertion. + tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI, + SibValueInfo(SrcReg, SrcVNI))); + // This is the first time we see Src, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + propagateSiblingValue(SVI, VNI); + // Next work list item. continue; } } // Track reachable reloads. + SVI->second.DefMI = MI; + SVI->second.SpillMBB = MI->getParent(); int FI; if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { - DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':' - << VNI->id << "@" << VNI->def << '\n'); - SVI.AllDefsAreReloads = true; + DEBUG(dbgs() << "reload\n"); + propagateSiblingValue(SVI); + // Next work list item. continue; } - // We have an 'original' def. Don't record trivial cases. - if (VNI == UseVNI) { - DEBUG(dbgs() << "Not a sibling copy.\n"); - return MI; - } - // Potential remat candidate. - DEBUG(dbgs() << " def " << PrintReg(Reg) << ':' - << VNI->id << '@' << VNI->def << '\t' << *MI); - SVI.DefMI = MI; + DEBUG(dbgs() << "def " << *MI); + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); } while (!WorkList.empty()); - if (SeenOrigPHI || SVI.DefMI) - SVI.AllDefsAreReloads = false; - - DEBUG({ - if (SVI.AllDefsAreReloads) - dbgs() << "All defs are reloads.\n"; - else - dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':' - << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n'; - }); - SibValues.insert(std::make_pair(UseVNI, SVI)); - return SVI.DefMI; + // Look up the value we were looking for. We already did this lokup at the + // top of the function, but SibValues may have been invalidated. + SVI = SibValues.find(UseVNI); + assert(SVI != SibValues.end() && "Didn't compute requested info"); + DEBUG(dbgs() << " traced to:\t" << SVI->second); + return SVI->second.DefMI; } /// analyzeSiblingValues - Trace values defined by sibling copies back to @@ -506,6 +703,7 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { // Already spilled everywhere. if (SVI.AllDefsAreReloads) { + DEBUG(dbgs() << "\tno spill needed: " << SVI); ++NumOmitReloadSpill; return true; } @@ -531,10 +729,8 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); - if (MBB == CopyMI->getParent()) - ++NumHoistLocal; - else - ++NumHoistGlobal; + ++NumSpills; + ++NumHoists; return true; } @@ -589,7 +785,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // eliminateDeadDefs won't normally remove stores, so switch opcode. MI->setDesc(TII.get(TargetOpcode::KILL)); DeadDefs.push_back(MI); - ++NumRedundantSpills; + ++NumSpillsRemoved; + --NumSpills; } } } while (!WorkList.empty()); @@ -637,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); - VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); @@ -787,10 +984,10 @@ void InlineSpiller::reMaterializeAll() { /// If MI is a load or store of StackSlot, it can be removed. bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { int FI = 0; - unsigned InstrReg; - if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) && - !(InstrReg = TII.isStoreToStackSlot(MI, FI))) - return false; + unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI); + bool IsLoad = InstrReg; + if (!IsLoad) + InstrReg = TII.isStoreToStackSlot(MI, FI); // We have a stack access. Is it the right register and slot? if (InstrReg != Reg || FI != StackSlot) @@ -799,6 +996,15 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { DEBUG(dbgs() << "Coalescing stack access: " << *MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); + + if (IsLoad) { + ++NumReloadsRemoved; + --NumReloads; + } else { + ++NumSpillsRemoved; + --NumSpills; + } + return true; } @@ -810,6 +1016,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) { + bool WasCopy = MI->isCopy(); // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; @@ -839,7 +1046,12 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); - ++NumFolded; + if (!WasCopy) + ++NumFolded; + else if (Ops.front() == 0) + ++NumSpills; + else + ++NumReloads; return true; } @@ -975,8 +1187,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. - if (Writes && hasLiveDef) + if (Writes) { + if (hasLiveDef) insertSpill(NewLI, OldLI, Idx, MI); + else { + // This instruction defines a dead value. We don't need to spill it, + // but do create a live range for the dead value. + VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI)); + } + } DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index a09bb39..29b47bd 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -18,10 +18,13 @@ using namespace llvm; +// Static member used for null interference cursors. +InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; + void InterferenceCache::init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, - const TargetRegisterInfo *tri) { + const TargetRegisterInfo *tri) { MF = mf; LIUArray = liuarray; TRI = tri; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index 7f0a27a..4df0a9e 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -138,6 +138,7 @@ public: class Cursor { Entry *CacheEntry; BlockInterference *Current; + static BlockInterference NoInterference; void setEntry(Entry *E) { Current = 0; @@ -175,7 +176,7 @@ public: /// moveTo - Move cursor to basic block MBBNum. void moveToBlock(unsigned MBBNum) { - Current = CacheEntry->get(MBBNum); + Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference; } /// hasInterference - Return true if the current block has any interference. diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 611886f..0f92c2d 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -27,7 +27,7 @@ using namespace llvm; template <class ArgIt> static void EnsureFunctionExists(Module &M, const char *Name, ArgIt ArgBegin, ArgIt ArgEnd, - const Type *RetTy) { + Type *RetTy) { // Insert a correctly-typed definition now. std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) @@ -64,7 +64,7 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, template <class ArgIt> static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, ArgIt ArgBegin, ArgIt ArgEnd, - const Type *RetTy) { + Type *RetTy) { // If we haven't already looked up this function, check to see if the // program already contains a function with this name. Module *M = CI->getParent()->getParent()->getParent(); @@ -462,7 +462,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - const IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - const IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -484,7 +484,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - const IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index f985af8..80ecc22 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -27,16 +27,18 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace llvm { @@ -55,8 +57,12 @@ static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, cl::desc("Disable code placement")); static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); +static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, + cl::desc("Disable Machine Dead Code Elimination")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, + cl::desc("Disable Machine Common Subexpression Elimination")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -103,20 +109,17 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS) + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : TargetMachine(T, Triple, CPU, FS) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM); AsmInfo = T.createMCAsmInfo(Triple); -} - -// Set the default code model for the JIT for a generic target. -// FIXME: Is small right here? or .is64Bit() ? Large : Small? -void LLVMTargetMachine::setCodeModelForJIT() { - setCodeModel(CodeModel::Small); -} - -// Set the default code model for static compilation for a generic target. -void LLVMTargetMachine::setCodeModelForStatic() { - setCodeModel(CodeModel::Small); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(AsmInfo && "MCAsmInfo not initialized." + "Make sure you include the correct TargetSelect.h!"); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, @@ -134,21 +137,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, Context->setAllowTemporaryLabels(false); const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); OwningPtr<MCStreamer> AsmStreamer; switch (FileType) { default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - TargetAsmBackend *TAB = 0; + MCAsmBackend *MAB = 0; if (ShowMCEncoding) { const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context); - TAB = getTarget().createAsmBackend(getTargetTriple()); + MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context); + MAB = getTarget().createMCAsmBackend(getTargetTriple()); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -156,7 +160,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, hasMCUseLoc(), hasMCUseCFI(), InstPrinter, - MCE, TAB, + MCE, MAB, ShowMCInst); AsmStreamer.reset(S); break; @@ -164,17 +168,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, - *Context); - TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); - if (MCE == 0 || TAB == 0) + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, + *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + if (MCE == 0 || MAB == 0) return true; - AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), - *Context, *TAB, Out, MCE, - hasMCRelaxAll(), - hasMCNoExecStack())); + AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), + *Context, *MAB, Out, + MCE, hasMCRelaxAll(), + hasMCNoExecStack())); AsmStreamer.get()->InitSections(); break; } @@ -198,8 +201,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(Printer); - // Make sure the code model is set. - setCodeModelForStatic(); PM.add(createGCInfoDeleter()); return false; } @@ -214,9 +215,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, CodeGenOpt::Level OptLevel, bool DisableVerify) { - // Make sure the code model is set. - setCodeModelForJIT(); - // Add common CodeGen passes. MCContext *Ctx = 0; if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) @@ -248,16 +246,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx); - TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); - if (MCE == 0 || TAB == 0) + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + if (MCE == 0 || MAB == 0) return true; OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx, - *TAB, Out, MCE, - hasMCRelaxAll(), - hasMCNoExecStack())); + AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx, + *MAB, Out, MCE, + hasMCRelaxAll(), + hasMCNoExecStack())); AsmStreamer.get()->InitSections(); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. @@ -270,9 +268,6 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, PM.add(Printer); - // Make sure the code model is set. - setCodeModelForJIT(); - return false; // success! } @@ -369,8 +364,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. - TargetAsmInfo *TAI = new TargetAsmInfo(*this); - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI); + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getRegisterInfo(), + &getTargetLowering()->getObjFileLowering()); PM.add(MMI); OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. @@ -412,12 +408,14 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - PM.add(createDeadMachineInstructionElimPass()); + if (!DisableMachineDCE) + PM.add(createDeadMachineInstructionElimPass()); printAndVerify(PM, "After codegen DCE pass"); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); - PM.add(createMachineCSEPass()); + if (!DisableMachineCSE) + PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); @@ -452,8 +450,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (addPostRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PostRegAlloc passes"); - PM.add(createLowerSubregsPass()); - printAndVerify(PM, "After LowerSubregs"); + PM.add(createExpandPostRAPseudosPass()); + printAndVerify(PM, "After ExpandPostRAPseudos"); // Insert prolog/epilog code. Eliminate abstract frame index references... PM.add(createPrologEpilogCodeInserter()); diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp new file mode 100644 index 0000000..a12e1a3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -0,0 +1,335 @@ +//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LexicalScopes analysis. +// +// This pass collects lexical scope information and maps machine instructions +// to respective lexical scopes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lexicalscopes" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/Function.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +LexicalScopes::~LexicalScopes() { + releaseMemory(); +} + +/// releaseMemory - release memory. +void LexicalScopes::releaseMemory() { + MF = NULL; + CurrentFnLexicalScope = NULL; + DeleteContainerSeconds(LexicalScopeMap); + DeleteContainerSeconds(AbstractScopeMap); + InlinedLexicalScopeMap.clear(); + AbstractScopesList.clear(); +} + +/// initialize - Scan machine function and constuct lexical scope nest. +void LexicalScopes::initialize(const MachineFunction &Fn) { + releaseMemory(); + MF = &Fn; + SmallVector<InsnRange, 4> MIRanges; + DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; + extractLexicalScopes(MIRanges, MI2ScopeMap); + if (CurrentFnLexicalScope) { + constructScopeNest(CurrentFnLexicalScope); + assignInstructionRanges(MIRanges, MI2ScopeMap); + } +} + +/// extractLexicalScopes - Extract instruction ranges for each lexical scopes +/// for the given machine function. +void LexicalScopes:: +extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + + // Scan each instruction and create scopes. First build working set of scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + const MachineInstr *RangeBeginMI = NULL; + const MachineInstr *PrevMI = NULL; + DebugLoc PrevDL; + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + + // Check if instruction has valid location information. + const DebugLoc MIDL = MInsn->getDebugLoc(); + if (MIDL.isUnknown()) { + PrevMI = MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (MIDL == PrevDL) { + PrevMI = MInsn; + continue; + } + + // Ignore DBG_VALUE. It does not contribute to any instruction in output. + if (MInsn->isDebugValue()) + continue; + + if (RangeBeginMI) { + // If we have already seen a beginning of an instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + InsnRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + MIRanges.push_back(R); + } + + // This is a beginning of a new instruction range. + RangeBeginMI = MInsn; + + // Reset previous markers. + PrevMI = MInsn; + PrevDL = MIDL; + } + + // Create last instruction range. + if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { + InsnRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + } + } +} + +/// findLexicalScope - Find lexical scope, either regular or inlined, for the +/// given DebugLoc. Return NULL if not found. +LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { + MDNode *Scope = NULL; + MDNode *IA = NULL; + DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); + if (!Scope) return NULL; + + // The scope that we were created with could have an extra file - which + // isn't what we care about in this case. + DIDescriptor D = DIDescriptor(Scope); + if (D.isLexicalBlockFile()) + Scope = DILexicalBlockFile(Scope).getScope(); + + if (IA) + return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); + return LexicalScopeMap.lookup(Scope); +} + +/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If +/// not available then create new lexical scope. +LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); + + if (InlinedAt) { + // Create an abstract scope for inlined function. + getOrCreateAbstractScope(Scope); + // Create an inlined scope for inlined function. + return getOrCreateInlinedScope(Scope, InlinedAt); + } + + return getOrCreateRegularScope(Scope); +} + +/// getOrCreateRegularScope - Find or create a regular lexical scope. +LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { + DIDescriptor D = DIDescriptor(Scope); + if (D.isLexicalBlockFile()) { + Scope = DILexicalBlockFile(Scope).getScope(); + D = DIDescriptor(Scope); + } + + LexicalScope *WScope = LexicalScopeMap.lookup(Scope); + if (WScope) + return WScope; + + LexicalScope *Parent = NULL; + if (D.isLexicalBlock()) + Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); + WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); + LexicalScopeMap.insert(std::make_pair(Scope, WScope)); + if (!Parent && DIDescriptor(Scope).isSubprogram() + && DISubprogram(Scope).describes(MF->getFunction())) + CurrentFnLexicalScope = WScope; + + return WScope; +} + +/// getOrCreateInlinedScope - Find or create an inlined lexical scope. +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, + MDNode *InlinedAt) { + LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); + if (InlinedScope) + return InlinedScope; + + DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt); + InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc), + DIDescriptor(Scope), InlinedAt, false); + InlinedLexicalScopeMap[InlinedLoc] = InlinedScope; + LexicalScopeMap[InlinedAt] = InlinedScope; + return InlinedScope; +} + +/// getOrCreateAbstractScope - Find or create an abstract lexical scope. +LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { + assert(N && "Invalid Scope encoding!"); + + DIDescriptor Scope(N); + if (Scope.isLexicalBlockFile()) + Scope = DILexicalBlockFile(Scope).getScope(); + LexicalScope *AScope = AbstractScopeMap.lookup(N); + if (AScope) + return AScope; + + LexicalScope *Parent = NULL; + if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + Parent = getOrCreateAbstractScope(ParentDesc); + } + AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true); + AbstractScopeMap[N] = AScope; + if (DIDescriptor(N).isSubprogram()) + AbstractScopesList.push_back(AScope); + return AScope; +} + +/// constructScopeNest +void LexicalScopes::constructScopeNest(LexicalScope *Scope) { + assert (Scope && "Unable to calculate scop edominance graph!"); + SmallVector<LexicalScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + LexicalScope *WS = WorkStack.back(); + const SmallVector<LexicalScope *, 4> &Children = WS->getChildren(); + bool visitedChildren = false; + for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) { + LexicalScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// assignInstructionRanges - Find ranges of instructions covered by each +/// lexical scope. +void LexicalScopes:: +assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) +{ + + LexicalScope *PrevLexicalScope = NULL; + for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); RI != RE; ++RI) { + const InsnRange &R = *RI; + LexicalScope *S = MI2ScopeMap.lookup(R.first); + assert (S && "Lost LexicalScope for a machine instruction!"); + if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) + PrevLexicalScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevLexicalScope = S; + } + + if (PrevLexicalScope) + PrevLexicalScope->closeInsnRange(); +} + +/// getMachineBasicBlocks - Populate given set using machine basic blocks which +/// have machine instructions that belong to lexical scope identified by +/// DebugLoc. +void LexicalScopes:: +getMachineBasicBlocks(DebugLoc DL, + SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) { + MBBs.clear(); + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return; + + if (Scope == CurrentFnLexicalScope) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + MBBs.insert(I); + return; + } + + SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); + for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), + E = InsnRanges.end(); I != E; ++I) { + InsnRange &R = *I; + MBBs.insert(R.first->getParent()); + } +} + +/// dominates - Return true if DebugLoc's lexical scope dominates at least one +/// machine instruction's lexical scope in a given machine basic block. +bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return false; + + // Current function scope covers all basic blocks in the function. + if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF) + return true; + + bool Result = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + DebugLoc IDL = I->getDebugLoc(); + if (IDL.isUnknown()) + continue; + if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) + if (Scope->dominates(IScope)) + return true; + } + return Result; +} + +/// dump - Print data structures. +void LexicalScope::dump() const { +#ifndef NDEBUG + raw_ostream &err = dbgs(); + err.indent(IndentLevel); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; + const MDNode *N = Desc; + N->dump(); + if (AbstractScope) + err << "Abstract Scope\n"; + + IndentLevel += 2; + if (!Children.empty()) + err << "Children ...\n"; + for (unsigned i = 0, e = Children.size(); i != e; ++i) + if (Children[i] != this) + Children[i]->dump(); + + IndentLevel -= 2; +#endif +} + diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 5d38c83..3dfe4c0 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -25,7 +25,10 @@ #include "llvm/Constants.h" #include "llvm/Metadata.h" #include "llvm/Value.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -44,6 +47,7 @@ static cl::opt<bool> EnableLDV("live-debug-variables", cl::init(true), cl::desc("Enable the live debug variables pass"), cl::Hidden); +STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); char LiveDebugVariables::ID = 0; INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", @@ -67,6 +71,29 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { /// LocMap - Map of where a user value is live, and its location. typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; +namespace { +/// UserValueScopes - Keeps track of lexical scopes associated with an +/// user value's source location. +class UserValueScopes { + DebugLoc DL; + LexicalScopes &LS; + SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; + +public: + UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {} + + /// dominates - Return true if current scope dominates at least one machine + /// instruction in a given machine basic block. + bool dominates(MachineBasicBlock *MBB) { + if (LBlocks.empty()) + LS.getMachineBasicBlocks(DL, LBlocks); + if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB)) + return true; + return false; + } +}; +} // end anonymous namespace + /// UserValue - A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register @@ -179,6 +206,9 @@ public: LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + else + // A later DBG_VALUE at the same SlotIndex overrides the old location. + I.setValue(getLocationNo(LocMO)); } /// extendDef - Extend the current definition as far as possible down the @@ -195,7 +225,8 @@ public: void extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, - LiveIntervals &LIS, MachineDominatorTree &MDT); + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -213,7 +244,8 @@ public: /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. void computeIntervals(MachineRegisterInfo &MRI, - LiveIntervals &LIS, MachineDominatorTree &MDT); + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, @@ -236,6 +268,9 @@ public: /// Only first one needs DebugLoc to identify variable's lexical scope /// in source file. DebugLoc findDebugLoc(); + + /// getDebugLoc - Return DebugLoc of this UserValue. + DebugLoc getDebugLoc() { return dl;} void print(raw_ostream&, const TargetMachine*); }; } // namespace @@ -247,6 +282,7 @@ class LDVImpl { LocMap::Allocator allocator; MachineFunction *MF; LiveIntervals *LIS; + LexicalScopes LS; MachineDominatorTree *MDT; const TargetRegisterInfo *TRI; @@ -312,8 +348,10 @@ public: } // namespace void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { - if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2))) - OS << "!\"" << MDS->getString() << "\"\t"; + DIVariable DV(variable); + OS << "!\""; + DV.printExtendedName(OS); + OS << "\"\t"; if (offset) OS << '+' << offset; for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { @@ -447,10 +485,10 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, - LiveIntervals &LIS, MachineDominatorTree &MDT) { + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); - do { SlotIndex Start = Todo.pop_back_val(); MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); @@ -497,8 +535,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, continue; const std::vector<MachineDomTreeNode*> &Children = MDT.getNode(MBB)->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock())); + for (unsigned i = 0, e = Children.size(); i != e; ++i) { + MachineBasicBlock *MBB = Children[i]->getBlock(); + if (UVS.dominates(MBB)) + Todo.push_back(LIS.getMBBStartIdx(MBB)); + } } while (!Todo.empty()); } @@ -578,7 +619,8 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveIntervals &LIS, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + UserValueScopes &UVS) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -597,10 +639,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveInterval *LI = &LIS.getInterval(Loc.getReg()); const VNInfo *VNI = LI->getVNInfoAt(Idx); SmallVector<SlotIndex, 16> Kills; - extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT); + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); } else - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT); + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -613,7 +655,8 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, void LDVImpl::computeIntervals() { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT); + UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); + userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS); userValues[i]->mapVirtRegs(this); } } @@ -624,6 +667,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { MDT = &pass.getAnalysis<MachineDominatorTree>(); TRI = mf.getTarget().getRegisterInfo(); clear(); + LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << ((Value*)mf.getFunction())->getName() << " **********\n"); @@ -631,6 +675,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool Changed = collectDebugValues(mf); computeIntervals(); DEBUG(print(dbgs())); + LS.releaseMemory(); return Changed; } @@ -891,6 +936,7 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, const TargetInstrInfo &TII) { MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); MachineOperand &Loc = locations[LocNo]; + ++NumInsertedDebugValues; // Frame index locations may require a target callback. if (Loc.isFI()) { @@ -921,7 +967,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); insertDebugValue(MBB, Start, LocNo, LIS, TII); - // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while(Stop > MBBEnd) { diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index cfade24..b69945a 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -148,7 +148,6 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { /// remaining unused values. void LiveInterval::RenumberValues(LiveIntervals &lis) { SmallPtrSet<VNInfo*, 8> Seen; - bool seenPHIDef = false; valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; @@ -157,26 +156,6 @@ void LiveInterval::RenumberValues(LiveIntervals &lis) { assert(!VNI->isUnused() && "Unused valno used by live range"); VNI->id = (unsigned)valnos.size(); valnos.push_back(VNI); - VNI->setHasPHIKill(false); - if (VNI->isPHIDef()) - seenPHIDef = true; - } - - // Recompute phi kill flags. - if (!seenPHIDef) - return; - for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) { - VNInfo *VNI = *I; - if (!VNI->isPHIDef()) - continue; - const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def); - assert(PHIBB && "No basic block for phi-def"); - for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(), - PE = PHIBB->pred_end(); PI != PE; ++PI) { - VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot()); - if (KVNI) - KVNI->setHasPHIKill(true); - } } } @@ -294,20 +273,20 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// extendInBlock - If this interval is live before UseIdx in the basic -/// block that starts at StartIdx, extend it to be live at UseIdx and return -/// the value. If there is no live range before UseIdx, return NULL. -VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) { +/// extendInBlock - If this interval is live before Kill in the basic +/// block that starts at StartIdx, extend it to be live up to Kill and return +/// the value. If there is no live range before Kill, return NULL. +VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) return 0; - iterator I = std::upper_bound(begin(), end(), UseIdx); + iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); if (I == begin()) return 0; --I; if (I->end <= StartIdx) return 0; - if (I->end <= UseIdx) - extendIntervalEndTo(I, UseIdx.getNextSlot()); + if (I->end < Kill) + extendIntervalEndTo(I, Kill); return I->valno; } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 9257191..b1e202a 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -304,8 +304,19 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Make sure the first definition is not a partial redefinition. Add an // <imp-def> of the full register. - if (MO.getSubReg()) + // FIXME: LiveIntervals shouldn't modify the code like this. Whoever + // created the machine instruction should annotate it with <undef> flags + // as needed. Then we can simply assert here. The REG_SEQUENCE lowering + // is the main suspect. + if (MO.getSubReg()) { mi->addRegisterDefined(interval.reg); + // Mark all defs of interval.reg on this instruction as reading <undef>. + for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO2 = mi->getOperand(i); + if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg()) + MO2.setIsUndef(); + } + } MachineInstr *CopyMI = NULL; if (mi->isCopyLike()) { @@ -747,6 +758,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Find all the values used, including PHI kills. SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; + // Blocks that have already been added to WorkList as live-out. + SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + // Visit all instructions reading li->reg. for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg); MachineInstr *UseMI = I.skipInstruction();) { @@ -780,8 +794,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - // We may eliminate PHI values, so recompute PHIKill flags. - VNI->setHasPHIKill(false); NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); // A use tied to an early-clobber def ends at the load slot and isn't caught @@ -804,7 +816,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -813,13 +825,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // The PHI is live, make sure the predecessors are live-out. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { + if (!LiveOut.insert(*PI)) + continue; SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - VNInfo *PVNI = li->getVNInfoAt(Stop); // A predecessor is not required to have a live-out value for a PHI. - if (PVNI) { - PVNI->setHasPHIKill(true); + if (VNInfo *PVNI = li->getVNInfoAt(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); - } } continue; } @@ -831,6 +842,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { + if (!LiveOut.insert(*PI)) + continue; SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 70003e7..110fe1e 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -91,25 +91,6 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << '\n'; } -void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS, - const TargetRegisterInfo *TRI) const { - OS << '[' << start() << ';' << stop() << "):" - << PrintReg(interference()->reg, TRI); -} - -void LiveIntervalUnion::Query::print(raw_ostream &OS, - const TargetRegisterInfo *TRI) { - OS << "Interferences with "; - LiveUnion->print(OS, TRI); - InterferenceResult IR = firstInterference(); - while (isInterference(IR)) { - OS << " "; - IR.print(OS, TRI); - OS << '\n'; - nextInterference(IR); - } -} - #ifndef NDEBUG // Verify the live intervals in this union and add them to the visited set. void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { @@ -118,114 +99,6 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { } #endif //!NDEBUG -// Private interface accessed by Query. -// -// Find a pair of segments that intersect, one in the live virtual register -// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query) -// is responsible for advancing the LiveIntervalUnion segments to find a -// "notable" intersection, which requires query-specific logic. -// -// This design assumes only a fast mechanism for intersecting a single live -// virtual register segment with a set of LiveIntervalUnion segments. This may -// be ok since most virtual registers have very few segments. If we had a data -// structure that optimizd MxN intersection of segments, then we would bypass -// the loop that advances within the LiveInterval. -// -// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first -// segment whose start point is greater than LiveInterval's end point. -// -// Assumes that segments are sorted by start position in both -// LiveInterval and LiveSegments. -void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const { - // Search until reaching the end of the LiveUnion segments. - LiveInterval::iterator VirtRegEnd = VirtReg->end(); - if (IR.VirtRegI == VirtRegEnd) - return; - while (IR.LiveUnionI.valid()) { - // Slowly advance the live virtual reg iterator until we surpass the next - // segment in LiveUnion. - // - // Note: If this is ever used for coalescing of fixed registers and we have - // a live vreg with thousands of segments, then change this code to use - // upperBound instead. - IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start()); - if (IR.VirtRegI == VirtRegEnd) - break; // Retain current (nonoverlapping) LiveUnionI - - // VirtRegI may have advanced far beyond LiveUnionI, catch up. - IR.LiveUnionI.advanceTo(IR.VirtRegI->start); - - // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end - if (!IR.LiveUnionI.valid()) - break; - if (IR.LiveUnionI.start() < IR.VirtRegI->end) { - assert(overlap(*IR.VirtRegI, IR.LiveUnionI) && - "upperBound postcondition"); - break; - } - } - if (!IR.LiveUnionI.valid()) - IR.VirtRegI = VirtRegEnd; -} - -// Find the first intersection, and cache interference info -// (retain segment iterators into both VirtReg and LiveUnion). -const LiveIntervalUnion::InterferenceResult & -LiveIntervalUnion::Query::firstInterference() { - if (CheckedFirstInterference) - return FirstInterference; - CheckedFirstInterference = true; - InterferenceResult &IR = FirstInterference; - IR.LiveUnionI.setMap(LiveUnion->getMap()); - - // Quickly skip interference check for empty sets. - if (VirtReg->empty() || LiveUnion->empty()) { - IR.VirtRegI = VirtReg->end(); - } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) { - // VirtReg starts first, perform double binary search. - IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); - if (IR.VirtRegI != VirtReg->end()) - IR.LiveUnionI.find(IR.VirtRegI->start); - } else { - // LiveUnion starts first, perform double binary search. - IR.LiveUnionI.find(VirtReg->beginIndex()); - if (IR.LiveUnionI.valid()) - IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); - else - IR.VirtRegI = VirtReg->end(); - } - findIntersection(FirstInterference); - assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid()) - && "Uninitialized iterator"); - return FirstInterference; -} - -// Treat the result as an iterator and advance to the next interfering pair -// of segments. This is a plain iterator with no filter. -bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const { - assert(isInterference(IR) && "iteration past end of interferences"); - - // Advance either the VirtReg or LiveUnion segment to ensure that we visit all - // unique overlapping pairs. - if (IR.VirtRegI->end < IR.LiveUnionI.stop()) { - if (++IR.VirtRegI == VirtReg->end()) - return false; - } - else { - if (!(++IR.LiveUnionI).valid()) { - IR.VirtRegI = VirtReg->end(); - return false; - } - } - // Short-circuit findIntersection() if possible. - if (overlap(*IR.VirtRegI, IR.LiveUnionI)) - return true; - - // Find the next intersection. - findIntersection(IR); - return isInterference(IR); -} - // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { @@ -234,64 +107,75 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { return I != InterferingVRegs.end(); } -// Count the number of virtual registers in this union that interfere with this +// Collect virtual registers in this union that interfere with this // query's live virtual register. // -// The number of times that we either advance IR.VirtRegI or call -// LiveUnion.upperBound() will be no more than the number of holes in -// VirtReg. So each invocation of collectInterferingVRegs() takes -// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()). +// The query state is one of: +// +// 1. CheckedFirstInterference == false: Iterators are uninitialized. +// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused. +// 3. Iterators left at the last seen intersection. // -// For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: collectInterferingVRegs(unsigned MaxInterferingRegs) { - InterferenceResult IR = firstInterference(); - LiveInterval::iterator VirtRegEnd = VirtReg->end(); - LiveInterval *RecentInterferingVReg = NULL; - if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) { - // Advance the union's iterator to reach an unseen interfering vreg. - do { - if (IR.LiveUnionI.value() == RecentInterferingVReg) - continue; + // Fast path return if we already have the desired information. + if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + + // Set up iterators on the first call. + if (!CheckedFirstInterference) { + CheckedFirstInterference = true; + + // Quickly skip interference check for empty sets. + if (VirtReg->empty() || LiveUnion->empty()) { + SeenAllInterferences = true; + return 0; + } - if (!isSeenInterference(IR.LiveUnionI.value())) - break; + // In most cases, the union will start before VirtReg. + VirtRegI = VirtReg->begin(); + LiveUnionI.setMap(LiveUnion->getMap()); + LiveUnionI.find(VirtRegI->start); + } - // Cache the most recent interfering vreg to bypass isSeenInterference. - RecentInterferingVReg = IR.LiveUnionI.value(); + LiveInterval::iterator VirtRegEnd = VirtReg->end(); + LiveInterval *RecentReg = 0; + while (LiveUnionI.valid()) { + assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg"); + + // Check for overlapping interference. + while (VirtRegI->start < LiveUnionI.stop() && + VirtRegI->end > LiveUnionI.start()) { + // This is an overlap, record the interfering register. + LiveInterval *VReg = LiveUnionI.value(); + if (VReg != RecentReg && !isSeenInterference(VReg)) { + RecentReg = VReg; + InterferingVRegs.push_back(VReg); + if (InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + } + // This LiveUnion segment is no longer interesting. + if (!(++LiveUnionI).valid()) { + SeenAllInterferences = true; + return InterferingVRegs.size(); + } + } - } while ((++IR.LiveUnionI).valid()); - if (!IR.LiveUnionI.valid()) - break; + // The iterators are now not overlapping, LiveUnionI has been advanced + // beyond VirtRegI. + assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap"); - // Advance the VirtReg iterator until surpassing the next segment in - // LiveUnion. - IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start()); - if (IR.VirtRegI == VirtRegEnd) + // Advance the iterator that ends first. + VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start()); + if (VirtRegI == VirtRegEnd) break; - // Check for intersection with the union's segment. - if (overlap(*IR.VirtRegI, IR.LiveUnionI)) { - - if (!IR.LiveUnionI.value()->isSpillable()) - SeenUnspillableVReg = true; - - if (InterferingVRegs.size() == MaxInterferingRegs) - // Leave SeenAllInterferences set to false to indicate that at least one - // interference exists beyond those we collected. - return MaxInterferingRegs; - - InterferingVRegs.push_back(IR.LiveUnionI.value()); - - // Cache the most recent interfering vreg to bypass isSeenInterference. - RecentInterferingVReg = IR.LiveUnionI.value(); - ++IR.LiveUnionI; - + // Detect overlap, handle above. + if (VirtRegI->start < LiveUnionI.stop()) continue; - } - // VirtRegI may have advanced far beyond LiveUnionI, - // do a fast intersection test to "catch up" - IR.LiveUnionI.advanceTo(IR.VirtRegI->start); + + // Still not overlapping. Catch up LiveUnionI. + LiveUnionI.advanceTo(VirtRegI->start); } SeenAllInterferences = true; return InterferingVRegs.size(); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h index 5e78d5e..5d64d28 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h @@ -59,7 +59,6 @@ public: // LiveIntervalUnions share an external allocator. typedef LiveSegments::Allocator Allocator; - class InterferenceResult; class Query; private: @@ -106,62 +105,13 @@ public: void verify(LiveVirtRegBitSet& VisitedVRegs); #endif - /// Cache a single interference test result in the form of two intersecting - /// segments. This allows efficiently iterating over the interferences. The - /// iteration logic is handled by LiveIntervalUnion::Query which may - /// filter interferences depending on the type of query. - class InterferenceResult { - friend class Query; - - LiveInterval::iterator VirtRegI; // current position in VirtReg - SegmentIter LiveUnionI; // current position in LiveUnion - - // Internal ctor. - InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI) - : VirtRegI(VRegI), LiveUnionI(UnionI) {} - - public: - // Public default ctor. - InterferenceResult(): VirtRegI(), LiveUnionI() {} - - /// start - Return the start of the current overlap. - SlotIndex start() const { - return std::max(VirtRegI->start, LiveUnionI.start()); - } - - /// stop - Return the end of the current overlap. - SlotIndex stop() const { - return std::min(VirtRegI->end, LiveUnionI.stop()); - } - - /// interference - Return the register that is interfering here. - LiveInterval *interference() const { return LiveUnionI.value(); } - - // Note: this interface provides raw access to the iterators because the - // result has no way to tell if it's valid to dereference them. - - // Access the VirtReg segment. - LiveInterval::iterator virtRegPos() const { return VirtRegI; } - - // Access the LiveUnion segment. - const SegmentIter &liveUnionPos() const { return LiveUnionI; } - - bool operator==(const InterferenceResult &IR) const { - return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI; - } - bool operator!=(const InterferenceResult &IR) const { - return !operator==(IR); - } - - void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; - }; - /// Query interferences between a single live virtual register and a live /// interval union. class Query { LiveIntervalUnion *LiveUnion; LiveInterval *VirtReg; - InterferenceResult FirstInterference; + LiveInterval::iterator VirtRegI; // current position in VirtReg + SegmentIter LiveUnionI; // current position in LiveUnion SmallVector<LiveInterval*,4> InterferingVRegs; bool CheckedFirstInterference; bool SeenAllInterferences; @@ -206,26 +156,8 @@ public: return *VirtReg; } - bool isInterference(const InterferenceResult &IR) const { - if (IR.VirtRegI != VirtReg->end()) { - assert(overlap(*IR.VirtRegI, IR.LiveUnionI) && - "invalid segment iterators"); - return true; - } - return false; - } - // Does this live virtual register interfere with the union? - bool checkInterference() { return isInterference(firstInterference()); } - - // Get the first pair of interfering segments, or a noninterfering result. - // This initializes the firstInterference_ cache. - const InterferenceResult &firstInterference(); - - // Treat the result as an iterator and advance to the next interfering pair - // of segments. Visiting each unique interfering pairs means that the same - // VirtReg or LiveUnion segment may be visited multiple times. - bool nextInterference(InterferenceResult &IR) const; + bool checkInterference() { return collectInterferingVRegs(1); } // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. @@ -249,13 +181,9 @@ public: /// Loop. bool checkLoopInterference(MachineLoopRange*); - void print(raw_ostream &OS, const TargetRegisterInfo *TRI); private: Query(const Query&); // DO NOT IMPLEMENT void operator=(const Query&); // DO NOT IMPLEMENT - - // Private interface for queries - void findIntersection(InterferenceResult &IR) const; }; }; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp new file mode 100644 index 0000000..a7d5af5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -0,0 +1,270 @@ +//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the LiveRangeCalc class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveRangeCalc.h" +#include "llvm/CodeGen/MachineDominators.h" + +using namespace llvm; + +void LiveRangeCalc::reset(const MachineFunction *MF) { + unsigned N = MF->getNumBlockIDs(); + Seen.clear(); + Seen.resize(N); + LiveOut.resize(N); + LiveIn.clear(); +} + + +// Transfer information from the LiveIn vector to the live ranges. +void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) { + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), + E = LiveIn.end(); I != E; ++I) { + if (!I->DomNode) + continue; + MachineBasicBlock *MBB = I->DomNode->getBlock(); + + VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value; + assert(VNI && "No live-in value found"); + + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(MBB); + + if (I->Kill.isValid()) + I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + I->LI->addRange(LiveRange(Start, End, VNI)); + // The value is live-through, update LiveOut as well. Defer the Domtree + // lookup until it is needed. + assert(Seen.test(MBB->getNumber())); + LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + } + } + LiveIn.clear(); +} + + +void LiveRangeCalc::extend(LiveInterval *LI, + SlotIndex Kill, + SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc) { + assert(LI && "Missing live range"); + assert(Kill.isValid() && "Invalid SlotIndex"); + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); + assert(Kill && "No MBB at Kill"); + + // Is there a def in the same MBB we can extend? + if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + return; + + // Find the single reaching def, or determine if Kill is jointly dominated by + // multiple values, and we may need to create even more phi-defs to preserve + // VNInfo SSA form. Perform a search for all predecessor blocks where we + // know the dominating VNInfo. + VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree); + + // When there were multiple different values, we may need new PHIs. + if (!VNI) + updateSSA(Indexes, DomTree, Alloc); + + updateLiveIns(VNI, Indexes); +} + + +// This function is called by a client after using the low-level API to add +// live-out and live-in blocks. The unique value optimization is not +// available, SplitEditor::transferValues handles that case directly anyway. +void LiveRangeCalc::calculateValues(SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc) { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + updateSSA(Indexes, DomTree, Alloc); + updateLiveIns(0, Indexes); +} + + +VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + SlotIndexes *Indexes, + MachineDominatorTree *DomTree) { + // Blocks where LI should be live-in. + SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = 0; + + // Using Seen as a visited set, perform a BFS for all reaching defs. + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = WorkList[i]; + assert(!MBB->pred_empty() && "Value live-in to entry block?"); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + + // Is this a known live-out block? + if (Seen.test(Pred->getNumber())) { + if (VNInfo *VNI = LiveOut[Pred].first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } + continue; + } + + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(Pred); + + // First time we see Pred. Try to determine the live-out value, but set + // it as null if Pred is live-through with an unknown value. + VNInfo *VNI = LI->extendInBlock(Start, End); + setLiveOutValue(Pred, VNI); + if (VNI) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + continue; + } + + // No, we need a live-in value for Pred as well + if (Pred != KillMBB) + WorkList.push_back(Pred); + else + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); + } + } + + // Transfer WorkList to LiveInBlocks in reverse order. + // This ordering works best with updateSSA(). + LiveIn.clear(); + LiveIn.reserve(WorkList.size()); + while(!WorkList.empty()) + addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val())); + + // The kill block may not be live-through. + assert(LiveIn.back().DomNode->getBlock() == KillMBB); + LiveIn.back().Kill = Kill; + + return UniqueVNI ? TheVNI : 0; +} + + +// This is essentially the same iterative algorithm that SSAUpdater uses, +// except we already have a dominator tree, so we don't have to recompute it. +void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc) { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + // Interate until convergence. + unsigned Changes; + do { + Changes = 0; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), + E = LiveIn.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; + MachineBasicBlock *MBB = Node->getBlock(); + MachineDomTreeNode *IDom = Node->getIDom(); + LiveOutPair IDomValue; + + // We need a live-in value to a block with no immediate dominator? + // This is probably an unreachable block that has survived somehow. + bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber()); + + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. + if (!needPHI) { + IDomValue = LiveOut[IDom->getBlock()]; + + // Cache the DomTree node that defined the value. + if (IDomValue.first && !IDomValue.second) + LiveOut[IDom->getBlock()].second = IDomValue.second = + DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + LiveOutPair &Value = LiveOut[*PI]; + if (!Value.first || Value.first == IDomValue.first) + continue; + + // Cache the DomTree node that defined the value. + if (!Value.second) + Value.second = + DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def)); + + // This predecessor is carrying something other than IDomValue. + // It could be because IDomValue hasn't propagated yet, or it could be + // because MBB is in the dominance frontier of that value. + if (DomTree->dominates(IDom, Value.second)) { + needPHI = true; + break; + } + } + } + + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOut indicates a foreign or missing value. + LiveOutPair &LOP = LiveOut[MBB]; + + // Create a phi-def if required. + if (needPHI) { + ++Changes; + assert(Alloc && "Need VNInfo allocator to create PHI-defs"); + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(MBB); + VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc); + VNI->setIsPHIDef(true); + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + + // Add liveness since updateLiveIns now skips this node. + if (I->Kill.isValid()) + I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + I->LI->addRange(LiveRange(Start, End, VNI)); + LOP = LiveOutPair(VNI, Node); + } + } else if (IDomValue.first) { + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + + // If the IDomValue is killed in the block, don't propagate through. + if (I->Kill.isValid()) + continue; + + // Propagate IDomValue if it isn't killed: + // MBB is live-out and doesn't define its own value. + if (LOP.first == IDomValue.first) + continue; + ++Changes; + LOP = IDomValue; + } + } + } while (Changes); +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h new file mode 100644 index 0000000..b8c8585 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -0,0 +1,226 @@ +//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeCalc class can be used to compute live ranges from scratch. It +// caches information about values in the CFG to speed up repeated operations +// on the same live range. The cache can be shared by non-overlapping live +// ranges. SplitKit uses that when computing the live range of split products. +// +// A low-level interface is available to clients that know where a variable is +// live, but don't know which value it has as every point. LiveRangeCalc will +// propagate values down the dominator tree, and even insert PHI-defs where +// needed. SplitKit uses this faster interface when possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVERANGECALC_H +#define LLVM_CODEGEN_LIVERANGECALC_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/CodeGen/LiveInterval.h" + +namespace llvm { + +/// Forward declarations for MachineDominators.h: +class MachineDominatorTree; +template <class NodeT> class DomTreeNodeBase; +typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; + +class LiveRangeCalc { + /// Seen - Bit vector of active entries in LiveOut, also used as a visited + /// set by findReachingDefs. One entry per basic block, indexed by block + /// number. This is kept as a separate bit vector because it can be cleared + /// quickly when switching live ranges. + BitVector Seen; + + /// LiveOutPair - A value and the block that defined it. The domtree node is + /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + + /// LiveOutMap - Map basic blocks to the value leaving the block. + typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + + /// LiveOut - Map each basic block where a live range is live out to the + /// live-out value and its defining block. + /// + /// For every basic block, MBB, one of these conditions shall be true: + /// + /// 1. !Seen.count(MBB->getNumber()) + /// Blocks without a Seen bit are ignored. + /// 2. LiveOut[MBB].second.getNode() == MBB + /// The live-out value is defined in MBB. + /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] + /// The live-out value passses through MBB. All predecessors must carry + /// the same value. + /// + /// The domtree node may be null, it can be computed. + /// + /// The map can be shared by multiple live ranges as long as no two are + /// live-out of the same block. + LiveOutMap LiveOut; + + /// LiveInBlock - Information about a basic block where a live range is known + /// to be live-in, but the value has not yet been determined. + struct LiveInBlock { + // LI - The live range that is live-in to this block. The algorithms can + // handle multiple non-overlapping live ranges simultaneously. + LiveInterval *LI; + + // DomNode - Dominator tree node for the block. + // Cleared when the final value has been determined and LI has been updated. + MachineDomTreeNode *DomNode; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. When the final value has been + // determined, the range from the block start to Kill will be added to LI. + SlotIndex Kill; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) + : LI(li), DomNode(node), Kill(kill), Value(0) {} + }; + + /// LiveIn - Work list of blocks where the live-in value has yet to be + /// determined. This list is typically computed by findReachingDefs() and + /// used as a work list by updateSSA(). The low-level interface may also be + /// used to add entries directly. + SmallVector<LiveInBlock, 16> LiveIn; + + /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at + /// Kill, search for values that can reach KillMBB. All blocks that need LI + /// to be live-in are added to LiveIn. If a unique reaching def is found, + /// its value is returned, if Kill is jointly dominated by multiple values, + /// NULL is returned. + VNInfo *findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + SlotIndexes *Indexes, + MachineDominatorTree *DomTree); + + /// updateSSA - Compute the values that will be live in to all requested + /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. + /// + /// Every live-in block must be jointly dominated by the added live-out + /// blocks. No values are read from the live ranges. + void updateSSA(SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc); + + /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI + /// as a wildcard value for LiveIn entries without a value. + void updateLiveIns(VNInfo *VNI, SlotIndexes*); + +public: + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Calculate live ranges from scratch. + // + + /// reset - Prepare caches for a new set of non-overlapping live ranges. The + /// caches must be reset before attempting calculations with a live range + /// that may overlap a previously computed live range, and before the first + /// live range in a function. If live ranges are not known to be + /// non-overlapping, call reset before each. + void reset(const MachineFunction *MF); + + /// calculate - Calculate the live range of a virtual register from its defs + /// and uses. LI must be empty with no values. + void calculate(LiveInterval *LI, + MachineRegisterInfo *MRI, + SlotIndexes *Indexes, + VNInfo::Allocator *Alloc); + + //===--------------------------------------------------------------------===// + // Mid-level interface. + //===--------------------------------------------------------------------===// + // + // Modify existing live ranges. + // + + /// extend - Extend the live range of LI to reach Kill. + /// + /// The existing values in LI must be live so they jointly dominate Kill. If + /// Kill is not dominated by a single existing value, PHI-defs are inserted + /// as required to preserve SSA form. If Kill is known to be dominated by a + /// single existing value, Alloc may be null. + void extend(LiveInterval *LI, + SlotIndex Kill, + SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc); + + /// extendToUses - Extend the live range of LI to reach all uses. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveInterval *LI, + MachineRegisterInfo *MRI, + SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // These functions can be used to compute live ranges where the live-in and + // live-out blocks are already known, but the SSA value in each block is + // unknown. + // + // After calling reset(), add known live-out values and known live-in blocks. + // Then call calculateValues() to compute the actual value that is + // live-in to each block, and add liveness to the live ranges. + // + + /// setLiveOutValue - Indicate that VNI is live out from MBB. The + /// calculateValues() function will not add liveness for MBB, the caller + /// should take care of that. + /// + /// VNI may be null only if MBB is a live-through block also passed to + /// addLiveInBlock(). + void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { + Seen.set(MBB->getNumber()); + LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + } + + /// addLiveInBlock - Add a block with an unknown live-in value. This + /// function can only be called once per basic block. Once the live-in value + /// has been determined, calculateValues() will add liveness to LI. + /// + /// @param LI The live range that is live-in to the block. + /// @param DomNode The domtree node for the block. + /// @param Kill Index in block where LI is killed. If the value is + /// live-through, set Kill = SLotIndex() and also call + /// setLiveOutValue(MBB, 0). + void addLiveInBlock(LiveInterval *LI, + MachineDomTreeNode *DomNode, + SlotIndex Kill = SlotIndex()) { + LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + } + + /// calculateValues - Calculate the value that will be live-in to each block + /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA + /// form. Add liveness to all live-in blocks up to the Kill point, or the + /// whole block for live-through blocks. + /// + /// Every predecessor of a live-in block must have been given a value with + /// setLiveOutValue, the value may be null for live-trough blocks. + void calculateValues(SlotIndexes *Indexes, + MachineDominatorTree *DomTree, + VNInfo::Allocator *Alloc); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index b385fb3..b23f851 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -319,9 +319,12 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, LiveIntervals &LIS, const MachineLoopInfo &Loops) { VirtRegAuxInfo VRAI(MF, LIS, Loops); + MachineRegisterInfo &MRI = MF.getRegInfo(); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; - VRAI.CalculateRegClass(LI.reg); + if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) + DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + << MRI.getRegClass(LI.reg)->getName() << '\n'); VRAI.CalculateWeightAndHint(LI); } } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h index db6740c..9b0a671 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h @@ -115,7 +115,7 @@ public: LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } ArrayRef<LiveInterval*> regs() const { - return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_); + return makeArrayRef(newRegs_).slice(firstNew_); } /// FIXME: Temporary accessors until we can get rid of diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index c75196a..939e795 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -44,7 +44,8 @@ void LiveStacks::releaseMemory() { S2RCMap.clear(); } -bool LiveStacks::runOnMachineFunction(MachineFunction &) { +bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); // FIXME: No analysis is being done right now. We are relying on the // register allocators to provide the information. return false; @@ -61,7 +62,7 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { } else { // Use the largest common subclass register class. const TargetRegisterClass *OldRC = S2RCMap[Slot]; - S2RCMap[Slot] = getCommonSubClass(OldRC, RC); + S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC); } return I->second; } diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 20bad60..2ca90f9 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -662,7 +662,7 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { bool removed = getVarInfo(Reg).removeKill(MI); assert(removed && "kill not in register's VarInfo?"); - removed = true; + (void)removed; } } } diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 8f0fb46..4c5fe4c 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -571,6 +571,11 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); + // Inherit live-ins from the successor + for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), + E = Succ->livein_end(); I != E; ++I) + NMBB->addLiveIn(*I); + // Update LiveVariables. if (LV) { // Restore kills of virtual registers that were killed by the terminators. diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 893a320..b92cda9 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====// +//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====// // // The LLVM Compiler Infrastructure // @@ -13,47 +13,49 @@ #include "llvm/InitializePasses.h" #include "llvm/Analysis/BlockFrequencyImpl.h" -#include "llvm/CodeGen/MachineBlockFrequency.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" using namespace llvm; -INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq", +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq", +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) -char MachineBlockFrequency::ID = 0; +char MachineBlockFrequencyInfo::ID = 0; -MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) { - initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry()); +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) { + initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, MachineBranchProbabilityInfo>(); } -MachineBlockFrequency::~MachineBlockFrequency() { +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() { delete MBFI; } -void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { +void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } -bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) { +bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); MBFI->doFunction(&F, &MBPI); return false; } -/// getblockFreq - Return block frequency. Never return 0, value must be -/// positive. Please note that initial frequency is equal to 1024. It means that -/// we should not rely on the value itself, but only on the comparison to the -/// other block frequencies. We do this to avoid using of floating points. +/// getblockFreq - Return block frequency. Return 0 if we don't have the +/// information. Please note that initial frequency is equal to 1024. It means +/// that we should not rely on the value itself, but only on the comparison to +/// the other block frequencies. We do this to avoid using of floating points. /// -uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) { +BlockFrequency MachineBlockFrequencyInfo:: +getBlockFreq(MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 3a60a37..7eda8c1 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -430,13 +430,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { unsigned NewReg = CSMI->getOperand(i).getReg(); if (OldReg == NewReg) continue; + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); + if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { DoCSE = false; break; } + + // Don't perform CSE if the result of the old instruction cannot exist + // within the register class of the new instruction. + const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); + if (!MRI->constrainRegClass(NewReg, OldRC)) { + DoCSE = false; + break; + } + CSEPairs.push_back(std::make_pair(OldReg, NewReg)); --NumDefs; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index cd25156..20066a0 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -619,7 +619,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } // MachineConstantPool implementation //===----------------------------------------------------------------------===// -const Type *MachineConstantPoolEntry::getType() const { +Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); return Val.ConstVal->getType(); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 143a29b..a240667 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -51,7 +51,7 @@ using namespace llvm; /// explicitly nulled out. void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { assert(isReg() && "Can only add reg operand to use lists"); - + // If the reginfo pointer is null, just explicitly null out or next/prev // pointers, to ensure they are not garbage. if (RegInfo == 0) { @@ -59,23 +59,23 @@ void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { Contents.Reg.Next = 0; return; } - + // Otherwise, add this operand to the head of the registers use/def list. MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - + // For SSA values, we prefer to keep the definition at the start of the list. // we do this by skipping over the definition if it is at the head of the // list. if (*Head && (*Head)->isDef()) Head = &(*Head)->Contents.Reg.Next; - + Contents.Reg.Next = *Head; if (Contents.Reg.Next) { assert(getReg() == Contents.Reg.Next->getReg() && "Different regs on the same list!"); Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; } - + Contents.Reg.Prev = Head; *Head = this; } @@ -86,7 +86,7 @@ void MachineOperand::RemoveRegOperandFromRegInfo() { assert(isOnRegUseList() && "Reg operand is not on a use list"); // Unlink this from the doubly linked list of operands. MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; + *Contents.Reg.Prev = NextOp; if (NextOp) { assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); NextOp->Contents.Reg.Prev = Contents.Reg.Prev; @@ -97,7 +97,7 @@ void MachineOperand::RemoveRegOperandFromRegInfo() { void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. - + // Otherwise, we have to change the register. If this operand is embedded // into a machine function, we need to update the old and new register's // use/def lists. @@ -109,7 +109,7 @@ void MachineOperand::setReg(unsigned Reg) { AddRegOperandToRegInfo(&MF->getRegInfo()); return; } - + // Otherwise, just change the register, no problem. :) SmallContents.RegNo = Reg; } @@ -144,7 +144,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { if (isReg() && getParent() && getParent()->getParent() && getParent()->getParent()->getParent()) RemoveRegOperandFromRegInfo(); - + OpKind = MO_Immediate; Contents.ImmVal = ImmVal; } @@ -155,7 +155,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + // If this operand is already a register operand, use setReg to update the // register's use/def lists. if (isReg()) { assert(!isEarlyClobber()); @@ -189,7 +189,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { if (getType() != Other.getType() || getTargetFlags() != Other.getTargetFlags()) return false; - + switch (getType()) { default: llvm_unreachable("Unrecognized operand type"); case MachineOperand::MO_Register: @@ -322,7 +322,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { default: llvm_unreachable("Unrecognized operand type"); } - + if (unsigned TF = getTargetFlags()) OS << "[TF=" << TF << ']'; } @@ -408,7 +408,7 @@ uint64_t MachineMemOperand::getAlignment() const { raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { assert((MMO.isLoad() || MMO.isStore()) && "SV has to be a load, store or both."); - + if (MMO.isVolatile()) OS << "Volatile "; @@ -417,7 +417,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { if (MMO.isStore()) OS << "ST"; OS << MMO.getSize(); - + // Print the address information. OS << "["; if (!MMO.getValue()) @@ -464,7 +464,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// MCID NULL and no operands. MachineInstr::MachineInstr() - : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock @@ -484,8 +484,9 @@ void MachineInstr::addImplicitDefUseOperands() { /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) - : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { + unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + MCID->getNumOperands()); @@ -498,8 +499,9 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + MCID->getNumOperands()); @@ -510,13 +512,14 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, } /// MachineInstr ctor - Work exactly the same as the ctor two above, except -/// that the MachineInstr is created and added to the end of the specified +/// that the MachineInstr is created and added to the end of the specified /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) - : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + unsigned NumImplicitOps = + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -528,10 +531,11 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) - : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + unsigned NumImplicitOps = + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -542,7 +546,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -550,7 +554,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) // Add operands for (unsigned i = 0; i != MI.getNumOperands(); ++i) addOperand(MI.getOperand(i)); - NumImplicitOps = MI.NumImplicitOps; // Copy all the flags. Flags = MI.Flags; @@ -605,102 +608,74 @@ void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list -/// (before the first implicit operand). +/// (before the first implicit operand). void MachineInstr::addOperand(const MachineOperand &Op) { + assert(MCID && "Cannot add operands before providing an instr descriptor"); bool isImpReg = Op.isReg() && Op.isImplicit(); - assert((isImpReg || !OperandsComplete()) && - "Trying to add an operand to a machine instr that is already done!"); - MachineRegisterInfo *RegInfo = getRegInfo(); - // If we are adding the operand to the end of the list, our job is simpler. - // This is true most of the time, so this is a reasonable optimization. - if (isImpReg || NumImplicitOps == 0) { - // We can only do this optimization if we know that the operand list won't - // reallocate. - if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) { - Operands.push_back(Op); - - // Set the parent of the operand. - Operands.back().ParentMI = this; - - // If the operand is a register, update the operand's use list. - if (Op.isReg()) { - Operands.back().AddRegOperandToRegInfo(RegInfo); - // If the register operand is flagged as early, mark the operand as such - unsigned OpNo = Operands.size() - 1; - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); - } - return; + // If the Operands backing store is reallocated, all register operands must + // be removed and re-added to RegInfo. It is storing pointers to operands. + bool Reallocate = RegInfo && + !Operands.empty() && Operands.size() == Operands.capacity(); + + // Find the insert location for the new operand. Implicit registers go at + // the end, everything goes before the implicit regs. + unsigned OpNo = Operands.size(); + + // Remove all the implicit operands from RegInfo if they need to be shifted. + // FIXME: Allow mixed explicit and implicit operands on inline asm. + // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as + // implicit-defs, but they must not be moved around. See the FIXME in + // InstrEmitter.cpp. + if (!isImpReg && !isInlineAsm()) { + while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { + --OpNo; + if (RegInfo) + Operands[OpNo].RemoveRegOperandFromRegInfo(); } } - - // Otherwise, we have to insert a real operand before any implicit ones. - unsigned OpNo = Operands.size()-NumImplicitOps; - // If this instruction isn't embedded into a function, then we don't need to - // update any operand lists. - if (RegInfo == 0) { - // Simple insertion, no reginfo update needed for other register operands. - Operands.insert(Operands.begin()+OpNo, Op); - Operands[OpNo].ParentMI = this; - - // Do explicitly set the reginfo for this operand though, to ensure the - // next/prev fields are properly nulled out. - if (Operands[OpNo].isReg()) { - Operands[OpNo].AddRegOperandToRegInfo(0); - // If the register operand is flagged as early, mark the operand as such - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); - } + // OpNo now points as the desired insertion point. Unless this is a variadic + // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). + assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) && + "Trying to add an operand to a machine instr that is already done!"); - } else if (Operands.size()+1 <= Operands.capacity()) { - // Otherwise, we have to remove register operands from their register use - // list, add the operand, then add the register operands back to their use - // list. This also must handle the case when the operand list reallocates - // to somewhere else. - - // If insertion of this operand won't cause reallocation of the operand - // list, just remove the implicit operands, add the operand, then re-add all - // the rest of the operands. - for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { - assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].RemoveRegOperandFromRegInfo(); - } - - // Add the operand. If it is a register, add it to the reg list. - Operands.insert(Operands.begin()+OpNo, Op); - Operands[OpNo].ParentMI = this; - - if (Operands[OpNo].isReg()) { - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); - // If the register operand is flagged as early, mark the operand as such - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); - } - - // Re-add all the implicit ops. - for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) { + // All operands from OpNo have been removed from RegInfo. If the Operands + // backing store needs to be reallocated, we also need to remove any other + // register operands. + if (Reallocate) + for (unsigned i = 0; i != OpNo; ++i) + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + + // Insert the new operand at OpNo. + Operands.insert(Operands.begin() + OpNo, Op); + Operands[OpNo].ParentMI = this; + + // The Operands backing store has now been reallocated, so we can re-add the + // operands before OpNo. + if (Reallocate) + for (unsigned i = 0; i != OpNo; ++i) + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(RegInfo); + + // When adding a register operand, tell RegInfo about it. + if (Operands[OpNo].isReg()) { + // Add the new operand to RegInfo, even when RegInfo is NULL. + // This will initialize the linked list pointers. + Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } + + // Re-add all the implicit ops. + if (RegInfo) { + for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); Operands[i].AddRegOperandToRegInfo(RegInfo); } - } else { - // Otherwise, we will be reallocating the operand list. Remove all reg - // operands from their list, then readd them after the operand list is - // reallocated. - RemoveRegOperandsFromUseLists(); - - Operands.insert(Operands.begin()+OpNo, Op); - Operands[OpNo].ParentMI = this; - - // Re-add all the operands. - AddRegOperandsToUseLists(*RegInfo); - - // If the register operand is flagged as early, mark the operand as such - if (Operands[OpNo].isReg() - && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); } } @@ -709,13 +684,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); - + // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. if (Operands.back().isReg() && Operands.back().isOnRegUseList()) Operands.back().RemoveRegOperandFromRegInfo(); - + Operands.pop_back(); return; } @@ -730,7 +705,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { Operands[i].RemoveRegOperandFromRegInfo(); } } - + Operands.erase(Operands.begin()+OpNo); if (RegInfo) { @@ -827,15 +802,6 @@ void MachineInstr::eraseFromParent() { } -/// OperandComplete - Return true if it's illegal to add a new operand -/// -bool MachineInstr::OperandsComplete() const { - unsigned short NumOperands = MCID->getNumOperands(); - if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) - return true; // Broken: we have all the operands of this instruction! - return false; -} - /// getNumExplicitOperands - Returns the number of non-implicit operands. /// unsigned MachineInstr::getNumExplicitOperands() const { @@ -860,6 +826,67 @@ bool MachineInstr::isStackAligningInlineAsm() const { return false; } +int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, + unsigned *GroupNo) const { + assert(isInlineAsm() && "Expected an inline asm instruction"); + assert(OpIdx < getNumOperands() && "OpIdx out of range"); + + // Ignore queries about the initial operands. + if (OpIdx < InlineAsm::MIOp_FirstOperand) + return -1; + + unsigned Group = 0; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + // If we reach the implicit register operands, stop looking. + if (!FlagMO.isImm()) + return -1; + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + if (i + NumOps > OpIdx) { + if (GroupNo) + *GroupNo = Group; + return i; + } + ++Group; + } + return -1; +} + +const TargetRegisterClass* +MachineInstr::getRegClassConstraint(unsigned OpIdx, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) const { + // Most opcodes have fixed constraints in their MCInstrDesc. + if (!isInlineAsm()) + return TII->getRegClass(getDesc(), OpIdx, TRI); + + if (!getOperand(OpIdx).isReg()) + return NULL; + + // For tied uses on inline asm, get the constraint from the def. + unsigned DefIdx; + if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx)) + OpIdx = DefIdx; + + // Inline asm stores register class constraints in the flag word. + int FlagIdx = findInlineAsmFlagIdx(OpIdx); + if (FlagIdx < 0) + return NULL; + + unsigned Flag = getOperand(FlagIdx).getImm(); + unsigned RCID; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) + return TRI->getRegClass(RCID); + + // Assume that all registers in a memory operand are pointers. + if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) + return TRI->getPointerRegClass(); + + return NULL; +} + /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -901,7 +928,8 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, Ops->push_back(i); if (MO.isUse()) Use |= !MO.isUndef(); - else if (MO.getSubReg()) + else if (MO.getSubReg() && !MO.isUndef()) + // A partial <def,undef> doesn't count as reading the register. PartDef = true; else FullDef = true; @@ -941,6 +969,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int MachineInstr::findFirstPredOperandIdx() const { + // Don't call MCID.findFirstPredOperandIdx() because this variant + // is sometimes called on an instruction that's not yet complete, and + // so the number of operands is less than the MCID indicates. In + // particular, the PTX target does this. const MCInstrDesc &MCID = getDesc(); if (MCID.isPredicable()) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) @@ -950,7 +982,7 @@ int MachineInstr::findFirstPredOperandIdx() const { return -1; } - + /// isRegTiedToUseOperand - Given the index of a register def operand, /// check if the register def is tied to a source operand, due to either /// two-address elimination or inline assembly constraints. Returns the @@ -964,23 +996,13 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; - unsigned DefPart = 0; - for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); - i < e; ) { - const MachineOperand &FMO = getOperand(i); - // After the normal asm operands there may be additional imp-def regs. - if (!FMO.isImm()) - return false; - // Skip over this def. - unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm()); - unsigned PrevDef = i + 1; - i = PrevDef + NumOps; - if (i > DefOpIdx) { - DefPart = DefOpIdx - PrevDef; - break; - } - ++DefNo; - } + int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo); + if (FlagIdx < 0) + return false; + + // Which part of the group is DefOpIdx? + unsigned DefPart = DefOpIdx - (FlagIdx + 1); + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i != e; ++i) { const MachineOperand &FMO = getOperand(i); @@ -1024,20 +1046,10 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { return false; // Find the flag operand corresponding to UseOpIdx - unsigned FlagIdx, NumOps=0; - for (FlagIdx = InlineAsm::MIOp_FirstOperand; - FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { - const MachineOperand &UFMO = getOperand(FlagIdx); - // After the normal asm operands there may be additional imp-def regs. - if (!UFMO.isImm()) - return false; - NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm()); - assert(NumOps < getNumOperands() && "Invalid inline asm flag"); - if (UseOpIdx < FlagIdx+NumOps+1) - break; - } - if (FlagIdx >= UseOpIdx) + int FlagIdx = findInlineAsmFlagIdx(UseOpIdx); + if (FlagIdx < 0) return false; + const MachineOperand &UFMO = getOperand(FlagIdx); unsigned DefNo; if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { @@ -1211,7 +1223,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { // conservatively assume it wasn't preserved. if (memoperands_empty()) return true; - + // Check the memory reference information for volatile references. for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) if ((*I)->isVolatile()) @@ -1318,7 +1330,7 @@ void MachineInstr::dump() const { dbgs() << " " << *this; } -static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, raw_ostream &CommentOS) { const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. @@ -1380,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { unsigned AsmDescOp = ~0u; unsigned AsmOpCount = 0; - if (isInlineAsm()) { + if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); @@ -1451,18 +1463,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << '$' << AsmOpCount++; unsigned Flag = MO.getImm(); switch (InlineAsm::getKind(Flag)) { - case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break; - case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break; - case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break; - case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break; - case InlineAsm::Kind_Imm: OS << ":[imm]"; break; - case InlineAsm::Kind_Mem: OS << ":[mem]"; break; - default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break; + case InlineAsm::Kind_RegUse: OS << ":[reguse"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber"; break; + case InlineAsm::Kind_Imm: OS << ":[imm"; break; + case InlineAsm::Kind_Mem: OS << ":[mem"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag); break; + } + + unsigned RCID = 0; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { + if (TM) + OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName(); + else + OS << ":RC" << RCID; } unsigned TiedTo = 0; if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) - OS << " [tiedto:$" << TiedTo << ']'; + OS << " tiedto:$" << TiedTo; + + OS << ']'; // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); @@ -1516,7 +1538,19 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } // Print debug location information. - if (!debugLoc.isUnknown() && MF) { + if (isDebugValue() && getOperand(e - 1).isMetadata()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + DIVariable DV(getOperand(e - 1).getMetadata()); + OS << " line no:" << DV.getLineNumber(); + if (MDNode *InlinedAt = DV.getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " inlined @[ "; + printDebugLoc(InlinedAtDL, MF, OS); + OS << " ]"; + } + } + } else if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); @@ -1627,7 +1661,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, // new implicit operand if required. if (Found || !AddIfNotFound) return Found; - + addOperand(MachineOperand::CreateReg(IncomingReg, true /*IsDef*/, true /*IsImp*/, diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 722ceb2..a1f80d5 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -37,10 +37,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<bool> +AvoidSpeculation("avoid-speculation", + cl::desc("MachineLICM should avoid speculation"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -91,6 +97,17 @@ namespace { // For each opcode, keep a list of potential CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + enum { + SpeculateFalse = 0, + SpeculateTrue = 1, + SpeculateUnknown = 2 + }; + + // If a MBB does not dominate loop exiting blocks then it may not safe + // to hoist loads from this block. + // Tri-state: 0 - false, 1 - true, 2 - unknown + unsigned SpeculationState; + public: static char ID; // Pass identification, replacement for typeid MachineLICM() : @@ -194,6 +211,10 @@ namespace { /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); + /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. + /// If not then a load from this mbb may not be safe to hoist. + bool IsGuaranteedToExecute(MachineBasicBlock *BB); + /// HoistRegion - Walk the specified region of the CFG (defined by all /// blocks dominated by the specified block, and that are in the current /// loop) in depth first order w.r.t the DominatorTree. This allows us to @@ -202,6 +223,13 @@ namespace { /// void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); + /// getRegisterClassIDAndCost - For a given MI, register, and the operand + /// index, return the ID and cost of its representative register class by + /// reference. + void getRegisterClassIDAndCost(const MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + unsigned &RCId, unsigned &RCCost) const; + /// InitRegPressure - Find all virtual register references that are liveout /// of the preheader to initialize the starting "register pressure". Note /// this does not count live through (livein but not used) registers. @@ -229,6 +257,10 @@ namespace { bool EliminateCSE(MachineInstr *MI, DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); + /// MayCSE - Return true if the given instruction will be CSE'd if it's + /// hoisted out of the loop. + bool MayCSE(MachineInstr *MI); + /// Hoist - When an instruction is found to only use loop invariant operands /// that is safe to hoist, this instruction is called to do the dirty work. /// It returns true if the instruction is hoisted. @@ -441,6 +473,12 @@ void MachineLICM::HoistRegionPostRA() { const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) continue; + // Conservatively treat live-in's as an external def. // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. @@ -452,6 +490,7 @@ void MachineLICM::HoistRegionPostRA() { ++PhysRegDefs[*AS]; } + SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ++MII) { MachineInstr *MI = &*MII; @@ -545,6 +584,27 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { Changed = true; } +// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. +// If not then a load from this mbb may not be safe to hoist. +bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { + if (SpeculationState != SpeculateUnknown) + return SpeculationState == SpeculateFalse; + + if (BB != CurLoop->getHeader()) { + // Check loop exiting blocks. + SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; + CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); + for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { + SpeculationState = SpeculateTrue; + return false; + } + } + + SpeculationState = SpeculateFalse; + return true; +} + /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -554,6 +614,11 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { assert(N != 0 && "Null dominator tree node?"); MachineBasicBlock *BB = N->getBlock(); + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) return; + // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; @@ -571,6 +636,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { // Remember livein register pressure. BackTrace.push_back(RegPressure); + SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; @@ -596,6 +662,23 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } +/// getRegisterClassIDAndCost - For a given MI, register, and the operand +/// index, return the ID and cost of its representative register class. +void +MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + unsigned &RCId, unsigned &RCCost) const { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + if (VT == MVT::untyped) { + RCId = RC->getID(); + RCCost = 1; + } else { + RCId = TLI->getRepRegClassFor(VT)->getID(); + RCCost = TLI->getRepRegClassCostFor(VT); + } +} + /// InitRegPressure - Find all virtual register references that are liveout of /// the preheader to initialize the starting "register pressure". Note this /// does not count live through (livein but not used) registers. @@ -625,18 +708,17 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { continue; bool isNew = RegSeen.insert(Reg); - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); if (MO.isDef()) - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] += RCCost; else { bool isKill = isOperandKill(MO, MRI); if (isNew && !isKill) // Haven't seen this, it must be a livein. - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] += RCCost; else if (!isNew && isKill) - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= RCCost; } } } @@ -661,11 +743,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { if (MO.isDef()) Defs.push_back(Reg); else if (!isNew && isOperandKill(MO, MRI)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned RCCost = TLI->getRepRegClassCostFor(VT); - + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); if (RCCost > RegPressure[RCId]) RegPressure[RCId] = 0; else @@ -673,13 +752,13 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { } } + unsigned Idx = 0; while (!Defs.empty()) { unsigned Reg = Defs.pop_back_val(); - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned RCCost = TLI->getRepRegClassCostFor(VT); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); RegPressure[RCId] += RCCost; + ++Idx; } } @@ -691,7 +770,14 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { bool DontMoveAcrossStore = true; if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) return false; - + + // If it is load then check if it is guaranteed to execute by making sure that + // it dominates all exiting blocks. If it doesn't, then there is a path out of + // the loop which does not execute this load, so we can't hoist it. + // Stores and side effects are already checked by isSafeToMove. + if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent())) + return false; + return true; } @@ -879,10 +965,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned RCCost = TLI->getRepRegClassCostFor(VT); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); if (MO.isDef()) { DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); if (CI != Cost.end()) @@ -941,16 +1025,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); if (MO.isDef()) { if (HasHighOperandLatency(MI, i, Reg)) { ++NumHighLatency; return true; } - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned RCCost = TLI->getRepRegClassCostFor(VT); DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); if (CI != Cost.end()) CI->second += RCCost; @@ -960,10 +1043,6 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // Is a virtual register use is a kill, hoisting it out of the loop // may actually reduce register pressure or be register pressure // neutral. - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned RCCost = TLI->getRepRegClassCostFor(VT); DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); if (CI != Cost.end()) CI->second -= RCCost; @@ -979,6 +1058,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return true; } + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) + return false; + // High register pressure situation, only hoist if the instruction is going to // be remat'ed. if (!TII->isTriviallyReMaterializable(&MI, AA) && @@ -1116,6 +1202,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; } +/// MayCSE - Return true if the given instruction will be CSE'd if it's +/// hoisted out of the loop. +bool MachineLICM::MayCSE(MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + return LookForDuplicate(MI, CI->second) != 0; +} + /// Hoist - When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index fadc594..80c4854 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -17,9 +17,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/Support/Dwarf.h" @@ -254,11 +252,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { //===----------------------------------------------------------------------===// MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, - const TargetAsmInfo *TAI) -: ImmutablePass(ID), Context(MAI, TAI), - ObjFileMMI(0), - CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false), - CallsExternalVAFunctionWithFloatingPointArguments(false) { + const MCRegisterInfo &MRI, + const MCObjectFileInfo *MOFI) + : ImmutablePass(ID), Context(MAI, MRI, MOFI), + ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0), + CallsUnwindInit(0), DbgInfoAvailable(false), + CallsExternalVAFunctionWithFloatingPointArguments(false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); @@ -267,7 +266,8 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, } MachineModuleInfo::MachineModuleInfo() -: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) { + : ImmutablePass(ID), + Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { assert(0 && "This MachineModuleInfo constructor should never be called, MMI " "should always be explicitly constructed by LLVMTargetMachine"); abort(); @@ -311,6 +311,7 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; + CompactUnwindEncoding = 0; VariableDbgInfo.clear(); } @@ -426,8 +427,9 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. /// -void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, - std::vector<const GlobalVariable *> &TyInfo) { +void MachineModuleInfo:: +addCatchTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalVariable *> TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); for (unsigned N = TyInfo.size(); N; --N) LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); @@ -435,8 +437,9 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. /// -void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, - std::vector<const GlobalVariable *> &TyInfo) { +void MachineModuleInfo:: +addFilterTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalVariable *> TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); std::vector<unsigned> IdsInFilter(TyInfo.size()); for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) @@ -496,6 +499,14 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { } } +/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site +/// indexes. +void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, + ArrayRef<unsigned> Sites) { + for (unsigned I = 0, E = Sites.size(); I != E; ++I) + LPadToCallSiteMap[Sym].push_back(Sites[I]); +} + /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 4b3e64c..266ebf6 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -14,10 +14,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { +MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) + : TRI(&TRI), IsSSA(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegs.resize(TRI.getNumRegs()); @@ -48,18 +49,47 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { const TargetRegisterClass * MachineRegisterInfo::constrainRegClass(unsigned Reg, - const TargetRegisterClass *RC) { + const TargetRegisterClass *RC, + unsigned MinNumRegs) { const TargetRegisterClass *OldRC = getRegClass(Reg); if (OldRC == RC) return RC; - const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC); - if (!NewRC) + const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + if (!NewRC || NewRC == OldRC) + return NewRC; + if (NewRC->getNumRegs() < MinNumRegs) return 0; - if (NewRC != OldRC) - setRegClass(Reg, NewRC); + setRegClass(Reg, NewRC); return NewRC; } +bool +MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterClass *OldRC = getRegClass(Reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + + // Stop early if there is no room to grow. + if (NewRC == OldRC) + return false; + + // Accumulate constraints from all uses. + for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; + ++I) { + // TRI doesn't have accurate enough information to model this yet. + if (I.getOperand().getSubReg()) + return false; + const TargetRegisterClass *OpRC = + I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + if (OpRC) + NewRC = TRI->getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) + return false; + } + setRegClass(Reg, NewRC); + return true; +} + /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. /// diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 916dff7..29cfb49 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -382,6 +382,25 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); } +/// collectDebgValues - Scan instructions following MI and collect any +/// matching DBG_VALUEs. +static void collectDebugValues(MachineInstr *MI, + SmallVector<MachineInstr *, 2> & DbgValues) { + DbgValues.clear(); + if (!MI->getOperand(0).isReg()) + return; + + MachineBasicBlock::iterator DI = MI; ++DI; + for (MachineBasicBlock::iterator DE = MI->getParent()->end(); + DI != DE; ++DI) { + if (!DI->isDebugValue()) + return; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == MI->getOperand(0).getReg()) + DbgValues.push_back(DI); + } +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { @@ -598,10 +617,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; + // collect matching debug values. + SmallVector<MachineInstr *, 2> DbgValuesToSink; + collectDebugValues(MI, DbgValuesToSink); + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + // Move debug values. + for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), + DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { + MachineInstr *DbgMI = *DBI; + SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, + ++MachineBasicBlock::iterator(DbgMI)); + } + // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. MI->clearKillInfo(); diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 7a55852..26847d3 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -72,6 +72,8 @@ namespace { typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; + const MachineInstr *FirstTerminator; + BitVector regsReserved; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; @@ -389,6 +391,8 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + FirstTerminator = 0; + // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -570,6 +574,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } + // Ensure non-terminators don't follow terminators. + if (MCID.isTerminator()) { + if (!FirstTerminator) + FirstTerminator = MI; + } else if (FirstTerminator) { + report("Non-terminator instruction after the first terminator", MI); + *OS << "First terminator was:\t" << *FirstTerminator; + } + + StringRef ErrorInfo; + if (!TII->verifyInstruction(MI, ErrorInfo)) + report(ErrorInfo.data(), MI); } void @@ -686,6 +702,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else addRegWithSubRegs(regsDefined, Reg); + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { @@ -714,20 +735,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { unsigned SubIdx = MO->getSubReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - unsigned sr = Reg; if (SubIdx) { - unsigned s = TRI->getSubReg(Reg, SubIdx); - if (!s) { - report("Invalid subregister index for physical register", - MO, MONum); - return; - } - sr = s; + report("Illegal subregister index for physical register", MO, MONum); + return; } if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { - if (!DRC->contains(sr)) { + if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - *OS << TRI->getName(sr) << " is not a " + *OS << TRI->getName(Reg) << " is not a " << DRC->getName() << " register.\n"; } } @@ -735,16 +750,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Virtual register. const TargetRegisterClass *RC = MRI->getRegClass(Reg); if (SubIdx) { - const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx); + const TargetRegisterClass *SRC = + TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); *OS << "Register class " << RC->getName() << " does not support subreg index " << SubIdx << "\n"; return; } - RC = SRC; + if (RC != SRC) { + report("Invalid register class for subregister index", MO, MONum); + *OS << "Register class " << RC->getName() + << " does not fully support subreg index " << SubIdx << "\n"; + return; + } } if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { + if (SubIdx) { + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(RC); + if (!SuperRC) { + report("No largest legal super class exists.", MO, MONum); + return; + } + DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx); + if (!DRC) { + report("No matching super-reg register class.", MO, MONum); + return; + } + } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -1161,18 +1195,8 @@ void MachineVerifier::verifyLiveIntervals() { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); const VNInfo *PVNI = LI.getVNInfoAt(PEnd); - if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) { - if (PVNI && !PVNI->hasPHIKill()) { - report("Value live out of predecessor doesn't have PHIKill", MF); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << " doesn't have PHIKill, but Valno #" << VNI->id - << " is PHIDef and defined at the beginning of BB#" - << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) - << " in " << LI << '\n'; - } + if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) continue; - } if (!PVNI) { report("Register not marked live out of predecessor", *PI); diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index af65f13..6994aa5 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -109,6 +109,9 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + // Split critical edges to help the coalescer if (!DisableEdgeSplitting) { if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index c523e39..bbc7ce2 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -295,7 +295,6 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, if (!DefMI || !DefMI->getDesc().isBitcast()) return false; - unsigned SrcDef = 0; unsigned SrcSrc = 0; NumDefs = DefMI->getDesc().getNumDefs(); NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; @@ -308,13 +307,13 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!Reg) continue; - if (MO.isDef()) - SrcDef = Reg; - else if (SrcSrc) - // Multiple sources? - return false; - else - SrcSrc = Reg; + if (!MO.isDef()) { + if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } } if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) @@ -434,6 +433,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MCID.isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. + LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; @@ -441,6 +441,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { } else if (MCID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. + LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index c04d656..b1d8c97 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -125,8 +125,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } + unsigned Reg = MI->getOperand(0).getReg(); MI->eraseFromParent(); Changed = true; + + // A REG_SEQUENCE may have been expanded into partial definitions. + // If this was the last one, mark Reg as implicitly defined. + if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) + ImpDefRegs.insert(Reg); continue; } } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index a901c5f..32c9325 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -54,6 +55,8 @@ INITIALIZE_PASS_END(PEI, "prologepilog", STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. @@ -677,7 +680,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(Offset - LocalAreaOffset); + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; } /// insertPrologEpilogCode - Scan the function for modified callee saved @@ -696,6 +701,13 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { if (!I->empty() && I->back().getDesc().isReturn()) TFI.emitEpilogue(Fn, *I); } + + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (EnableSegmentedStacks) + TFI.adjustForSegmentedStacks(Fn); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 5ea26ad..5496d69 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -20,7 +20,6 @@ #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" -#include "RegisterCoalescer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -160,7 +159,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); @@ -439,6 +438,7 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; if (LiveUnion.empty()) continue; + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); MachineFunction::iterator MBB = llvm::next(MF->begin()); MachineFunction::iterator MFE = MF->end(); SlotIndex Start, Stop; @@ -449,6 +449,8 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { if (SI.start() <= Start) { if (!MBB->isLiveIn(PhysReg)) MBB->addLiveIn(PhysReg); + DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' + << PrintReg(SI.value()->reg, TRI)); } else if (SI.start() > Stop) MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); if (++MBB == MFE) @@ -456,6 +458,7 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { tie(Start, Stop) = Indexes->getMBBRange(MBB); SI.advanceTo(Start); } + DEBUG(dbgs() << '\n'); } } @@ -495,8 +498,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, // Found an available register. return PhysReg; } + Queries[interfReg].collectInterferingVRegs(1); LiveInterval *interferingVirtReg = - Queries[interfReg].firstInterference().liveUnionPos().value(); + Queries[interfReg].interferingVRegs().front(); // The current VirtReg must either be spillable, or one of its interferences // must have less spill weight. diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index e235e87..f54a2c8 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -22,7 +22,6 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" -#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -51,6 +51,15 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); +static cl::opt<SplitEditor::ComplementSpillMode> +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -90,12 +99,26 @@ class RAGreedy : public MachineFunctionPass, // range splitting algorithm terminates, something that is otherwise hard to // ensure. enum LiveRangeStage { - RS_New, ///< Never seen before. - RS_First, ///< First time in the queue. - RS_Second, ///< Second time in the queue. - RS_Global, ///< Produced by global splitting. - RS_Local, ///< Produced by local splitting. - RS_Spill ///< Produced by spilling. + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done }; static const char *const StageName[]; @@ -157,17 +180,38 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { + // Register intended for assignment, or 0. unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. BitVector LiveBundles; SmallVector<unsigned, 8> ActiveBlocks; void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + IntvIdx = 0; Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -175,6 +219,12 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; + enum { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector<unsigned, 32> BundleCand; + public: RAGreedy(); @@ -208,8 +258,8 @@ private: void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); void growRegion(GlobalSplitCandidate &Cand); float calcGlobalSplitCost(GlobalSplitCandidate&); - void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, - SmallVectorImpl<LiveInterval*>&); + bool calcCompactRegion(GlobalSplitCandidate&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); @@ -222,6 +272,8 @@ private: SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -233,12 +285,12 @@ char RAGreedy::ID = 0; #ifndef NDEBUG const char *const RAGreedy::StageName[] = { - "RS_New", - "RS_First", - "RS_Second", - "RS_Global", - "RS_Local", - "RS_Spill" + "RS_New", + "RS_Assign", + "RS_Split", + "RS_Split2", + "RS_Spill", + "RS_Done" }; #endif @@ -278,7 +330,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); @@ -325,9 +377,15 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + // LRE may clone a virtual register because dead code elimination causes it to - // be split into connected components. Ensure that the new register gets the + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. // same stage as the parent. + ExtraRegInfo[Old].Stage = RS_Assign; ExtraRegInfo.grow(New); ExtraRegInfo[New] = ExtraRegInfo[Old]; } @@ -350,16 +408,15 @@ void RAGreedy::enqueue(LiveInterval *LI) { ExtraRegInfo.grow(Reg); if (ExtraRegInfo[Reg].Stage == RS_New) - ExtraRegInfo[Reg].Stage = RS_First; + ExtraRegInfo[Reg].Stage = RS_Assign; - if (ExtraRegInfo[Reg].Stage == RS_Second) + if (ExtraRegInfo[Reg].Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until - // everything else has been allocated. Long ranges are allocated last so - // they are split against realistic interference. - Prio = (1u << 31) - Size; - else { - // Everything else is allocated in long->short order. Long ranges that don't - // fit should be spilled ASAP so they don't create interference. + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. @@ -442,7 +499,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, /// @param BreaksHint True when B is already assigned to its preferred register. bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, LiveInterval &B, bool BreaksHint) { - bool CanSplit = getStage(B) <= RS_Second; + bool CanSplit = getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be // split. @@ -487,7 +544,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; // Never evict spill products. They cannot split or spill. - if (getStage(*Intf) == RS_Spill) + if (getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These @@ -627,6 +684,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; if (!Intf.hasInterference()) continue; @@ -638,9 +696,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveIn) { if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstUse) + else if (Intf.first() < BI.FirstInstr) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < BI.LastUse) + else if (Intf.first() < BI.LastInstr) ++Ins; } @@ -648,9 +706,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveOut) { if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastUse) + else if (Intf.last() > BI.LastInstr) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > BI.FirstUse) + else if (Intf.last() > BI.FirstInstr) ++Ins; } @@ -684,7 +742,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, assert(T < GroupSize && "Array overflow"); TBS[T] = Number; if (++T == GroupSize) { - SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); T = 0; } continue; @@ -714,7 +772,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); SpillPlacer->addConstraints(Array); - SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); } void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { @@ -749,8 +807,16 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Any new blocks to add? if (ActiveBlocks.size() == AddedTo) break; - addThroughConstraints(Cand.Intf, - ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo)); + + // Compute through constraints from the interference, or assume that all + // through blocks prefer spilling when forming compact regions. + ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + if (Cand.PhysReg) + addThroughConstraints(Cand.Intf, NewBlocks); + else + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); AddedTo = ActiveBlocks.size(); // Perhaps iterating can enable more bundles? @@ -759,11 +825,55 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { DEBUG(dbgs() << ", v=" << Visited); } +/// calcCompactRegion - Compute the set of edge bundles that should be live +/// when splitting the current live range into compact regions. Compact +/// regions can be computed without looking at interference. They are the +/// regions formed by removing all the live-through blocks from the live range. +/// +/// Returns false if the current live range is already compact, or if the +/// compact regions would form single block regions anyway. +bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { + // Without any through blocks, the live range is already compact. + if (!SA->getNumThroughBlocks()) + return false; + + // Compact regions don't correspond to any physreg. + Cand.reset(IntfCache, 0); + + DEBUG(dbgs() << "Compact region bundles"); + + // Use the spill placer to determine the live bundles. GrowRegion pretends + // that all the through blocks have interference when PhysReg is unset. + SpillPlacer->prepare(Cand.LiveBundles); + + // The static split cost will be zero since Cand.Intf reports no interference. + float Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + growRegion(Cand); + SpillPlacer->finish(); + + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + DEBUG({ + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + return true; +} + /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. float RAGreedy::calcSpillCost() { float Cost = 0; - const LiveInterval &LI = SA->getParent(); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -772,16 +882,8 @@ float RAGreedy::calcSpillCost() { Cost += SpillPlacer->getBlockFrequency(Number); // Unless the value is redefined in the block. - if (BI.LiveIn && BI.LiveOut) { - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(Number); - LiveInterval::const_iterator I = LI.find(Start); - assert(I != LI.end() && "Expected live-in value"); - // Is there a different live-out value? If so, we need an extra spill - // instruction. - if (I->end < Stop) - Cost += SpillPlacer->getBlockFrequency(Number); - } + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); } return Cost; } @@ -828,81 +930,115 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { return GlobalCost; } -/// splitAroundRegion - Split VirtReg around the region determined by -/// LiveBundles. Make an effort to avoid interference from PhysReg. +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. /// -/// The 'register' interval is going to contain as many uses as possible while -/// avoiding interference. The 'stack' interval is the complement constructed by -/// SplitEditor. It will contain the rest. +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, - GlobalSplitCandidate &Cand, - SmallVectorImpl<LiveInterval*> &NewVRegs) { - const BitVector &LiveBundles = Cand.LiveBundles; - - DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) - << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) - dbgs() << " EB#" << i; - dbgs() << ".\n"; - }); - - InterferenceCache::Cursor &Intf = Cand.Intf; - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - - // Create the main cross-block interval. - const unsigned MainIntv = SE->openIntv(); +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef<unsigned> UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = BI.LiveIn && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = BI.LiveOut && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + } // Create separate intervals for isolated blocks with multiple uses. - if (!RegIn && !RegOut) { + if (!IntvIn && !IntvOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - if (!BI.isOneInstr()) { + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); - } continue; } - Intf.moveToBlock(BI.MBB->getNumber()); - - if (RegIn && RegOut) - SE->splitLiveThroughBlock(BI.MBB->getNumber(), - MainIntv, Intf.first(), - MainIntv, Intf.last()); - else if (RegIn) - SE->splitRegInBlock(BI, MainIntv, Intf.first()); + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); else - SE->splitRegOutBlock(BI, MainIntv, Intf.last()); + SE->splitRegOutBlock(BI, IntvOut, IntfOut); } - // Handle live-through blocks. - for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { - unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - if (!RegIn && !RegOut) - continue; - Intf.moveToBlock(Number); - SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(), - RegOut ? MainIntv : 0, Intf.last()); + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); + + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + if (!IntvIn && !IntvOut) + continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + } } ++NumGlobalSplits; SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs()); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -922,18 +1058,18 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. if (IntvMap[i] == 0) { - setStage(Reg, RS_Global); + setStage(Reg, RS_Spill); continue; } - // Main interval. Allow repeated splitting as long as the number of live + // Global intervals. Allow repeated splitting as long as the number of live // blocks is strictly decreasing. - if (IntvMap[i] == MainIntv) { + if (IntvMap[i] < NumGlobalIntvs) { if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. - setStage(Reg, RS_Global); + setStage(Reg, RS_Split2); } continue; } @@ -948,11 +1084,23 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - float BestCost = Hysteresis * calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); - const unsigned NoCand = ~0u; - unsigned BestCand = NoCand; unsigned NumCands = 0; + unsigned BestCand = NoCand; + float BestCost; + SmallVector<unsigned, 8> UsedCands; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = HUGE_VALF; + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + } Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -962,7 +1110,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned WorstCount = ~0u; unsigned Worst = 0; for (unsigned i = 0; i != NumCands; ++i) { - if (i == BestCand) + if (i == BestCand || !GlobalCand[i].PhysReg) continue; unsigned Count = GlobalCand[i].LiveBundles.count(); if (Count < WorstCount) @@ -1019,15 +1167,94 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, ++NumCands; } - if (BestCand == NoCand) + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); + // Prepare split editor. + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); return 0; } //===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + +//===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1045,8 +1272,10 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. - SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; - SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; GapWeight.assign(NumGaps, 0.0f); @@ -1056,8 +1285,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, .checkInterference()) continue; - // We know that VirtReg is a continuous interval from FirstUse to LastUse, - // so we don't need InterferenceQuery. + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. // // Interference that overlaps an instruction is counted in both gaps // surrounding the instruction. The exception is interference before @@ -1097,8 +1326,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // while only covering a single block - A phi-def can use undef values from // predecessors, and the block could be a single-block loop. // We don't bother doing anything clever about such a case, we simply assume - // that the interval is continuous from FirstUse to LastUse. We should make - // sure that we don't do anything illegal to such an interval, though. + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; if (Uses.size() <= 2) @@ -1120,17 +1349,17 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // // Instead we use these rules: // - // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the + // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the // noop split, of course). - // 2. Require progress be made for ranges with getStage() >= RS_Local. All + // 2. Require progress be made for ranges with getStage() == RS_Split2. All // the new ranges must have fewer instructions than before the split. - // 3. New ranges with the same number of instructions are marked RS_Local, + // 3. New ranges with the same number of instructions are marked RS_Split2, // smaller ranges are marked RS_New. // // These rules allow a 3 -> 2+3 split once, which we need. They also prevent // excessive splitting and infinite loops. // - bool ProgressRequired = getStage(VirtReg) >= RS_Local; + bool ProgressRequired = getStage(VirtReg) >= RS_Split2; // Best split candidate. unsigned BestBefore = NumGaps; @@ -1249,7 +1478,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); // If the new range has the same number of instructions as before, mark it as - // RS_Local so the next split will be forced to make progress. Otherwise, + // RS_Split2 so the next split will be forced to make progress. Otherwise, // leave the new intervals as RS_New so they can compete. bool LiveBefore = BestBefore != 0 || BI.LiveIn; bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; @@ -1259,7 +1488,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Local); + setStage(*LREdit.get(i), RS_Split2); DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); } DEBUG(dbgs() << '\n'); @@ -1278,6 +1507,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*>&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); @@ -1287,11 +1520,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); - // Don't iterate global splitting. - // Move straight to spilling if this range was produced by a global split. - if (getStage(VirtReg) >= RS_Global) - return 0; - SA->analyze(&VirtReg); // FIXME: SplitAnalysis may repair broken live ranges coming from the @@ -1305,24 +1533,17 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return PhysReg; } - // First try to split around a region spanning multiple blocks. - unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); - if (PhysReg || !NewVRegs.empty()) - return PhysReg; - - // Then isolate blocks with multiple uses. - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); + // First try to split around a region spanning multiple blocks. RS_Split2 + // ranges already made dubious progress with region splitting, so they go + // straight to single block splitting. + if (getStage(VirtReg) < RS_Split2) { + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; } - // Don't assign any physregs. - return 0; + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); } @@ -1342,9 +1563,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary - // queue. The RS_Second ranges already failed to do this, and they should not + // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. - if (Stage != RS_Second) + if (Stage != RS_Split) if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) return PhysReg; @@ -1353,8 +1574,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - if (Stage == RS_First) { - setStage(VirtReg, RS_Second); + if (Stage < RS_Split) { + setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; @@ -1362,7 +1583,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. - if (Stage >= RS_Spill || !VirtReg.isSpillable()) + if (Stage >= RS_Done || !VirtReg.isSpillable()) return ~0u; // Try splitting VirtReg or interferences. @@ -1374,7 +1595,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); LiveRangeEdit LRE(VirtReg, NewVRegs, this); spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); if (VerifyEnabled) MF->verify(this, "After spilling"); @@ -1408,6 +1629,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); addMBBLiveIns(MF); @@ -1420,7 +1642,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { } // Write out new DBG_VALUE instructions. - DebugVars->emitDebugValues(VRM); + { + NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled); + DebugVars->emitDebugValues(VRM); + } // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp index 0dd3c598..ce3fb90 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp @@ -18,7 +18,6 @@ #include "VirtRegRewriter.h" #include "RegisterClassInfo.h" #include "Spiller.h" -#include "RegisterCoalescer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -209,7 +208,7 @@ namespace { AU.addRequiredID(StrongPHIEliminationID); // Make sure PassManager knows which analyses to make available // to coalescing and which analyses coalescing invalidates. - AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired<CalculateSpillWeights>(); AU.addRequiredID(LiveStacksID); AU.addPreservedID(LiveStacksID); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 72230d4..0d2cf2d 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -450,7 +450,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addPreserved<SlotIndexes>(); au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); - au.addRequired<RegisterCoalescer>(); + au.addRequiredID(RegisterCoalescerPassID); if (customPassID) au.addRequiredID(*customPassID); au.addRequired<CalculateSpillWeights>(); diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 5a77e47..786d279 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -99,11 +99,16 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // CSR aliases go after the volatile registers, preserve the target's order. std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + // Check if RC is a proper sub-class. + if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) + if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) + RCI.ProperSubClass = true; + DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); - dbgs() << " ]\n"; + dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); }); // RCI is now up-to-date. diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h index d21fd67..2c14070 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h @@ -28,11 +28,12 @@ class RegisterClassInfo { struct RCInfo { unsigned Tag; unsigned NumRegs; + bool ProperSubClass; OwningArrayPtr<unsigned> Order; - RCInfo() : Tag(0), NumRegs(0) {} + RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {} operator ArrayRef<unsigned>() const { - return ArrayRef<unsigned>(Order.get(), NumRegs); + return makeArrayRef(Order.get(), NumRegs); } }; @@ -87,6 +88,16 @@ public: return get(RC); } + /// isProperSubClass - Returns true if RC has a legal super-class with more + /// allocatable registers. + /// + /// Register classes like GR32_NOSP are not proper sub-classes because %esp + /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb + /// mode because the GPR super-class is not legal. + bool isProperSubClass(const TargetRegisterClass *RC) const { + return get(RC).ProperSubClass; + } + /// getLastCalleeSavedAlias - Returns the last callee saved register that /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index b91f92c..9b414d6 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -15,8 +15,9 @@ #define DEBUG_TYPE "regcoalescing" #include "RegisterCoalescer.h" -#include "VirtRegMap.h" #include "LiveDebugVariables.h" +#include "RegisterClassInfo.h" +#include "VirtRegMap.h" #include "llvm/Pass.h" #include "llvm/Value.h" @@ -54,6 +55,7 @@ STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); STATISTIC(numAborts , "Number of times interval joining aborted"); +STATISTIC(NumInflated , "Number of register classes inflated"); static cl::opt<bool> EnableJoining("join-liveintervals", @@ -75,6 +77,128 @@ VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); +namespace { + class RegisterCoalescer : public MachineFunctionPass { + MachineFunction* MF; + MachineRegisterInfo* MRI; + const TargetMachine* TM; + const TargetRegisterInfo* TRI; + const TargetInstrInfo* TII; + LiveIntervals *LIS; + LiveDebugVariables *LDV; + const MachineLoopInfo* Loops; + AliasAnalysis *AA; + RegisterClassInfo RegClassInfo; + + /// JoinedCopies - Keep track of copies eliminated due to coalescing. + /// + SmallPtrSet<MachineInstr*, 32> JoinedCopies; + + /// ReMatCopies - Keep track of copies eliminated due to remat. + /// + SmallPtrSet<MachineInstr*, 32> ReMatCopies; + + /// ReMatDefs - Keep track of definition instructions which have + /// been remat'ed. + SmallPtrSet<MachineInstr*, 8> ReMatDefs; + + /// joinIntervals - join compatible live intervals + void joinIntervals(); + + /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into the "TryAgain" list. + void CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<MachineInstr*> &TryAgain); + + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// which are the src/dst of the copy instruction CopyMI. This returns + /// true if the copy was successfully coalesced away. If it is not + /// currently possible to coalesce this interval, but it may be possible if + /// other things get coalesced, then it returns true by reference in + /// 'Again'. + bool JoinCopy(MachineInstr *TheCopy, bool &Again); + + /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// returns false. The output "SrcInt" will not have been modified, so we + /// can use this information below to update aliases. + bool JoinIntervals(CoalescerPair &CP); + + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// the source value number is defined by a copy from the destination reg + /// see if we can merge these two destination reg valno# into a single + /// value number, eliminating a copy. + bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + + /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + + /// ReMaterializeTrivialDef - If the source of a copy is defined by a + /// trivial computation, replace the copy by rematerialize the definition. + /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. + bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, + unsigned DstReg, MachineInstr *CopyMI); + + /// shouldJoinPhys - Return true if a physreg copy should be joined. + bool shouldJoinPhys(CoalescerPair &CP); + + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce + /// two virtual registers from different register classes. + bool isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC); + + /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// update the subregister number if it is not zero. If DstReg is a + /// physical register and the existing subregister number of the def / use + /// being updated is not zero, make sure to set it to the correct physical + /// subregister. + void UpdateRegDefsUses(const CoalescerPair &CP); + + /// RemoveDeadDef - If a def of a live interval is now determined dead, + /// remove the val# it defines. If the live interval becomes empty, remove + /// it as well. + bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + + /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the + /// VNInfo copy flag for DstReg and all aliases. + void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); + + /// markAsJoined - Remember that CopyMI has already been joined. + void markAsJoined(MachineInstr *CopyMI); + + /// eliminateUndefCopy - Handle copies of undef values. + bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); + + public: + static char ID; // Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; + }; +} /// end anonymous namespace + +char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID; + INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) @@ -116,14 +240,14 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, } bool CoalescerPair::setRegisters(const MachineInstr *MI) { - srcReg_ = dstReg_ = subIdx_ = 0; - newRC_ = 0; - flipped_ = crossClass_ = false; + SrcReg = DstReg = SubIdx = 0; + NewRC = 0; + Flipped = CrossClass = false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) return false; - partial_ = SrcSub || DstSub; + Partial = SrcSub || DstSub; // If one register is a physreg, it must be Dst. if (TargetRegisterInfo::isPhysicalRegister(Src)) { @@ -131,7 +255,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { return false; std::swap(Src, Dst); std::swap(SrcSub, DstSub); - flipped_ = true; + Flipped = true; } const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -139,14 +263,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { if (TargetRegisterInfo::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. if (DstSub) { - Dst = tri_.getSubReg(Dst, DstSub); + Dst = TRI.getSubReg(Dst, DstSub); if (!Dst) return false; DstSub = 0; } // Eliminate SrcSub by picking a corresponding Dst superregister. if (SrcSub) { - Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); if (!Dst) return false; SrcSub = 0; } else if (!MRI.getRegClass(Src)->contains(Dst)) { @@ -164,7 +288,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { return false; const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); - if (!getCommonSubClass(DstRC, SrcRC)) + if (!TRI.getCommonSubClass(DstRC, SrcRC)) return false; SrcSub = DstSub = 0; } @@ -174,36 +298,36 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { std::swap(Src, Dst); DstSub = SrcSub; SrcSub = 0; - assert(!flipped_ && "Unexpected flip"); - flipped_ = true; + assert(!Flipped && "Unexpected flip"); + Flipped = true; } // Find the new register class. const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); if (DstSub) - newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); else - newRC_ = getCommonSubClass(DstRC, SrcRC); - if (!newRC_) + NewRC = TRI.getCommonSubClass(DstRC, SrcRC); + if (!NewRC) return false; - crossClass_ = newRC_ != DstRC || newRC_ != SrcRC; + CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && "Cannot have a physical SubIdx"); - srcReg_ = Src; - dstReg_ = Dst; - subIdx_ = DstSub; + SrcReg = Src; + DstReg = Dst; + SubIdx = DstSub; return true; } bool CoalescerPair::flip() { - if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg)) return false; - std::swap(srcReg_, dstReg_); - flipped_ = !flipped_; + std::swap(SrcReg, DstReg); + Flipped = !Flipped; return true; } @@ -211,36 +335,36 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!MI) return false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) return false; - // Find the virtual register that is srcReg_. - if (Dst == srcReg_) { + // Find the virtual register that is SrcReg. + if (Dst == SrcReg) { std::swap(Src, Dst); std::swap(SrcSub, DstSub); - } else if (Src != srcReg_) { + } else if (Src != SrcReg) { return false; } - // Now check that Dst matches dstReg_. - if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + // Now check that Dst matches DstReg. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { if (!TargetRegisterInfo::isPhysicalRegister(Dst)) return false; - assert(!subIdx_ && "Inconsistent CoalescerPair state."); + assert(!SubIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. if (DstSub) - Dst = tri_.getSubReg(Dst, DstSub); + Dst = TRI.getSubReg(Dst, DstSub); // Full copy of Src. if (!SrcSub) - return dstReg_ == Dst; + return DstReg == Dst; // This is a partial register copy. Check that the parts match. - return tri_.getSubReg(dstReg_, SrcSub) == Dst; + return TRI.getSubReg(DstReg, SrcSub) == Dst; } else { - // dstReg_ is virtual. - if (dstReg_ != Dst) + // DstReg is virtual. + if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(tri_, subIdx_, SrcSub) == DstSub; + return compose(TRI, SubIdx, SrcSub) == DstSub; } } @@ -292,14 +416,14 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { // Bail if there is no dst interval - can happen when merging physical subreg // operations. - if (!li_->hasInterval(CP.getDstReg())) + if (!LIS->hasInterval(CP.getDstReg())) return false; LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -355,7 +479,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // Make sure that the end of the live range is inside the same block as // CopyMI. MachineInstr *ValLREndInst = - li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); + LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) return false; @@ -368,11 +492,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // of its aliases is overlapping the live interval of the virtual register. // If so, do not coalesce. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { + for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { DEBUG({ dbgs() << "\t\tInterfere with alias "; - li_->getInterval(*AS).print(dbgs(), tri_); + LIS->getInterval(*AS).print(dbgs(), TRI); }); return false; } @@ -380,7 +504,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, DEBUG({ dbgs() << "Extending: "; - IntB.print(dbgs(), tri_); + IntB.print(dbgs(), TRI); }); SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; @@ -398,13 +522,13 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // If the IntB live range is assigned to a physical register, and if that // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) + for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!LIS->hasInterval(*SR)) continue; - LiveInterval &SRLI = li_->getInterval(*SR); + LiveInterval &SRLI = LIS->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, SRLI.getNextValue(FillerStart, 0, - li_->getVNInfoAllocator()))); + LIS->getVNInfoAllocator()))); } } @@ -419,7 +543,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, } DEBUG({ dbgs() << " result = "; - IntB.print(dbgs(), tri_); + IntB.print(dbgs(), TRI); dbgs() << "\n"; }); @@ -434,7 +558,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // merge, find the last use and trim the live range. That will also add the // isKill marker. if (ALR->end == CopyIdx) - li_->shrinkToUses(&IntA); + LIS->shrinkToUses(&IntA); ++numExtends; return true; @@ -498,15 +622,15 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; // Bail if there is no dst interval. - if (!li_->hasInterval(CP.getDstReg())) + if (!LIS->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -524,7 +648,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // the optimization. if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) return false; const MCInstrDesc &MCID = DefMI->getDesc(); @@ -538,7 +662,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) return false; unsigned Op1, Op2, NewDstIdx; - if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) return false; if (Op1 == UseOpIdx) NewDstIdx = Op2; @@ -560,18 +684,18 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Abort if the aliases of IntB.reg have values that are not simply the // clobbers from the superreg. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && - HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) + for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + if (LIS->hasInterval(*AS) && + HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - mri_->use_nodbg_begin(IntA.reg), - UE = mri_->use_nodbg_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(IntA.reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - SlotIndex UseIdx = li_->getInstructionIndex(UseMI); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end()) continue; @@ -585,15 +709,15 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + MachineInstr *NewMI = TII->commuteInstruction(DefMI); if (!NewMI) return false; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && TargetRegisterInfo::isVirtualRegister(IntB.reg) && - !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) + !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) return false; if (NewMI != DefMI) { - li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); MBB->insert(DefMI, NewMI); MBB->erase(DefMI); } @@ -610,8 +734,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // = B // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), - UE = mri_->use_end(); UI != UE;) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), + UE = MRI->use_end(); UI != UE;) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; @@ -623,12 +747,12 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex(); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - UseMO.substPhysReg(NewReg, *tri_); + UseMO.substPhysReg(NewReg, *TRI); else UseMO.setReg(NewReg); if (UseMI == CopyMI) @@ -674,27 +798,24 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, bool preserveSrcInt, unsigned DstReg, - unsigned DstSubIdx, MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex(); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) + if (ValNo->isPHIDef() || ValNo->isUnused()) return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); if (!DefMI) return false; assert(DefMI && "Defining instruction disappeared"); const MCInstrDesc &MCID = DefMI->getDesc(); if (!MCID.isAsCheapAsAMove()) return false; - if (!tii_->isTriviallyReMaterializable(DefMI, AA)) + if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, AA, SawStore)) + if (!DefMI->isSafeToMove(TII, AA, SawStore)) return false; if (MCID.getNumDefs() != 1) return false; @@ -702,36 +823,20 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_); + const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (mri_->getRegClass(DstReg) != RC) + if (MRI->getRegClass(DstReg) != RC) return false; } else if (!RC->contains(DstReg)) return false; } - // If destination register has a sub-register index on it, make sure it - // matches the instruction register class. - if (DstSubIdx) { - const MCInstrDesc &MCID = DefMI->getDesc(); - if (MCID.getNumDefs() != 1) - return false; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = - DstRC->getSubRegisterRegClass(DstSubIdx); - const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_); - if (DefRC == DstRC) - DstSubIdx = 0; - else if (DefRC != DstSubRC) - return false; - } - RemoveCopyFlag(DstReg, CopyMI); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); + TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); // CopyMI may have implicit operands, transfer them over to the newly @@ -746,7 +851,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, } NewMI->copyImplicitOps(CopyMI); - li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); @@ -755,8 +860,51 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, // The source interval can become smaller because we removed a use. if (preserveSrcInt) - li_->shrinkToUses(&SrcInt); + LIS->shrinkToUses(&SrcInt); + + return true; +} + +/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> +/// values, it only removes local variables. When we have a copy like: +/// +/// %vreg1 = COPY %vreg2<undef> +/// +/// We delete the copy and remove the corresponding value number from %vreg1. +/// Any uses of that value number are marked as <undef>. +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, + const CoalescerPair &CP) { + SlotIndex Idx = LIS->getInstructionIndex(CopyMI); + LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); + if (SrcInt->liveAt(Idx)) + return false; + LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); + if (DstInt->liveAt(Idx)) + return false; + // No intervals are live-in to CopyMI - it is undef. + if (CP.isFlipped()) + DstInt = SrcInt; + SrcInt = 0; + + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + assert(DeadVNI && "No value defined in DstInt"); + DstInt->removeValNo(DeadVNI); + + // Find new undef uses. + for (MachineRegisterInfo::reg_nodbg_iterator + I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); + I != E; ++I) { + MachineOperand &MO = I.getOperand(); + if (MO.isDef() || MO.isUndef()) + continue; + MachineInstr *MI = MO.getParent(); + SlotIndex Idx = LIS->getInstructionIndex(MI); + if (DstInt->liveAt(Idx)) + continue; + MO.setIsUndef(true); + DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); + } return true; } @@ -773,22 +921,20 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { unsigned SubIdx = CP.getSubIdx(); // Update LiveDebugVariables. - ldv_->renameRegister(SrcReg, DstReg, SubIdx); + LDV->renameRegister(SrcReg, DstReg, SubIdx); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { // A PhysReg copy that won't be coalesced can perhaps be rematerialized // instead. if (DstIsPhys) { - if (UseMI->isCopy() && - !UseMI->getOperand(1).getSubReg() && - !UseMI->getOperand(0).getSubReg() && + if (UseMI->isFullCopy() && UseMI->getOperand(1).getReg() == SrcReg && UseMI->getOperand(0).getReg() != SrcReg && UseMI->getOperand(0).getReg() != DstReg && !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), 0, UseMI)) + ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false, + UseMI->getOperand(0).getReg(), UseMI)) continue; } @@ -803,10 +949,18 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { Kills |= MO.isKill(); Deads |= MO.isDead(); + // Make sure we don't create read-modify-write defs accidentally. We + // assume here that a SrcReg def cannot be joined into a live DstReg. If + // RegisterCoalescer starts tracking partially live registers, we will + // need to check the actual LiveInterval to determine if DstReg is live + // here. + if (SubIdx && !Reads) + MO.setIsUndef(); + if (DstIsPhys) - MO.substPhysReg(DstReg, *tri_); + MO.substPhysReg(DstReg, *TRI); else - MO.substVirtReg(DstReg, SubIdx, *tri_); + MO.substVirtReg(DstReg, SubIdx, *TRI); } // This instruction is a copy that will be removed. @@ -817,19 +971,19 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { // If UseMI was a simple SrcReg def, make sure we didn't turn it into a // read-modify-write of DstReg. if (Deads) - UseMI->addRegisterDead(DstReg, tri_); + UseMI->addRegisterDead(DstReg, TRI); else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, tri_); + UseMI->addRegisterDefined(DstReg, TRI); // Kill flags apply to the whole physical register. if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, tri_); + UseMI->addRegisterKilled(DstReg, TRI); } DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; dbgs() << *UseMI; }); } @@ -838,18 +992,18 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { /// removeIntervalIfEmpty - Check if the live interval of a physical register /// is empty, if so remove it and also remove the empty intervals of its /// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, - const TargetRegisterInfo *tri_) { +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, + const TargetRegisterInfo *TRI) { if (li.empty()) { if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) + for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { + if (!LIS->hasInterval(*SR)) continue; - LiveInterval &sli = li_->getInterval(*SR); + LiveInterval &sli = LIS->getInterval(*SR); if (sli.empty()) - li_->removeInterval(*SR); + LIS->removeInterval(*SR); } - li_->removeInterval(li.reg); + LIS->removeInterval(li.reg); return true; } return false; @@ -859,29 +1013,29 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, /// the val# it defines. If the live interval becomes empty, remove it as well. bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex(); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, li_, tri_); + return removeIntervalIfEmpty(li, LIS, TRI); } void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI) { - SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - if (li_->hasInterval(DstReg)) { - LiveInterval &LI = li_->getInterval(DstReg); + SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + if (LIS->hasInterval(DstReg)) { + LiveInterval &LI = LIS->getInterval(DstReg); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) if (LR->valno->def == DefIdx) LR->valno->setCopy(0); } if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) return; - for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { - if (!li_->hasInterval(*AS)) + for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) { + if (!LIS->hasInterval(*AS)) continue; - LiveInterval &LI = li_->getInterval(*AS); + LiveInterval &LI = LIS->getInterval(*AS); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) if (LR->valno->def == DefIdx) LR->valno->setCopy(0); @@ -894,8 +1048,8 @@ void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, /// are not spillable! If the destination interval uses are far away, think /// twice about coalescing them! bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = li_->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + bool Allocatable = LIS->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is @@ -918,8 +1072,8 @@ bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { // Don't join with physregs that have a ridiculous number of live // ranges. The data structure performance is really bad when that // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + if (LIS->hasInterval(CP.getDstReg()) && + LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) { ++numAborts; DEBUG(dbgs() << "\tPhysical register live interval too complicated, abort!\n"); @@ -929,9 +1083,9 @@ bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { // FIXME: Why are we skipping this test for partial copies? // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. if (!CP.isPartial()) { - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg()); unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + unsigned Length = LIS->getApproximateInstructionCount(JoinVInt); if (Length > Threshold) { ++numAborts; DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); @@ -957,12 +1111,12 @@ RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, // Early exit if the function is fairly small, coalesce aggressively if // that's the case. For really special register classes with 3 or // fewer registers, be a bit more careful. - (li_->getFuncInstructionCount() / NewRCCount) < 8) + (LIS->getFuncInstructionCount() / NewRCCount) < 8) return true; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); - unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + LiveInterval &SrcInt = LIS->getInterval(SrcReg); + LiveInterval &DstInt = LIS->getInterval(DstReg); + unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt); + unsigned DstSize = LIS->getApproximateInstructionCount(DstInt); // Coalesce aggressively if the intervals are small compared to the number of // registers in the new class. The number 4 is fairly arbitrary, chosen to be @@ -972,10 +1126,10 @@ RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, return true; // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), - mri_->use_nodbg_end()); - unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), - mri_->use_nodbg_end()); + unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg), + MRI->use_nodbg_end()); + unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg), + MRI->use_nodbg_end()); unsigned NewUses = SrcUses + DstUses; unsigned NewSize = SrcSize + DstSize; if (SrcRC != NewRC && SrcSize > ThresSize) { @@ -1003,9 +1157,9 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) return false; // Already done. - DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - CoalescerPair CP(*tii_, *tri_); + CoalescerPair CP(*TII, *TRI); if (!CP.setRegisters(CopyMI)) { DEBUG(dbgs() << "\tNot coalescable.\n"); return false; @@ -1018,8 +1172,15 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { return false; // Not coalescable. } - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) - << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + // Eliminate undefs. + if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + return false; // Not coalescable. + } + + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) + << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx()) << "\n"); // Enforce policies. @@ -1028,8 +1189,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) + ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), CopyMI)) return true; return false; } @@ -1042,8 +1203,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { return false; } if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - mri_->getRegClass(CP.getSrcReg()), - mri_->getRegClass(CP.getDstReg()), + MRI->getRegClass(CP.getSrcReg()), + MRI->getRegClass(CP.getDstReg()), CP.getNewRC())) { DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); Again = true; // May be possible to coalesce later. @@ -1052,8 +1213,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { } // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > - li_->getInterval(CP.getDstReg()).ranges.size()) + if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() > + LIS->getInterval(CP.getDstReg()).ranges.size()) CP.flip(); } @@ -1067,8 +1228,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) + ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. @@ -1091,7 +1252,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // other. Make sure the resulting register is set to the right register class. if (CP.isCrossClass()) { ++numCrossRCs; - mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); } // Remember to delete the copy instruction. @@ -1105,10 +1266,10 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { SmallVector<MachineBasicBlock*, 16> BlockSeq; // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the // ranges for this, and they are preserved. - LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg()); for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); I != E; ++I ) { - li_->findLiveInMBBs(I->start, I->end, BlockSeq); + LIS->findLiveInMBBs(I->start, I->end, BlockSeq); for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { MachineBasicBlock &block = *BlockSeq[idx]; if (!block.isLiveIn(CP.getDstReg())) @@ -1120,15 +1281,15 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // SrcReg is guarateed to be the register whose live interval that is // being merged. - li_->removeInterval(CP.getSrcReg()); + LIS->removeInterval(CP.getSrcReg()); // Update regalloc hint. - tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); + TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + LiveInterval &DstInt = LIS->getInterval(CP.getDstReg()); dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), tri_); + DstInt.print(dbgs(), TRI); dbgs() << "\n"; }); @@ -1197,6 +1358,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, // which allows us to coalesce A and B. // VNI is the definition of B. LR is the life range of A that includes // the slot just before B. If we return true, we add "B = X" to DupCopies. +// This implies that A dominates B. static bool RegistersDefinedFromSameValue(LiveIntervals &li, const TargetRegisterInfo &tri, CoalescerPair &CP, @@ -1248,7 +1410,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, // If the copies use two different value numbers of X, we cannot merge // A and B. LiveInterval &SrcInt = li.getInterval(Src); - if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def)) + // getVNInfoBefore returns NULL for undef copies. In this case, the + // optimization is still safe. + if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def)) return false; DupCopies.push_back(MI); @@ -1259,18 +1423,18 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, /// JoinIntervals - Attempt to join these two intervals. On failure, this /// returns false. bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; }); // If a live interval is a physical register, check for interference with any // aliases. The interference check implemented here is a bit more conservative // than the full interfeence check below. We allow overlapping live ranges // only when one is a copy of the other. if (CP.isPhys()) { - for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!li_->hasInterval(*AS)) + for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!LIS->hasInterval(*AS)) continue; - const LiveInterval &LHS = li_->getInterval(*AS); + const LiveInterval &LHS = LIS->getInterval(*AS); LiveInterval::const_iterator LI = LHS.begin(); for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); RI != RE; ++RI) { @@ -1278,10 +1442,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // Does LHS have an overlapping live range starting before RI? if ((LI != LHS.begin() && LI[-1].end > RI->start) && (RI->start != RI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) { DEBUG({ dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); + LHS.print(dbgs(), TRI); dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; }); return false; @@ -1290,10 +1454,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // Check that LHS ranges beginning in this range are copies. for (; LI != LHS.end() && LI->start < RI->end; ++LI) { if (LI->start != LI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) { DEBUG({ dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); + LHS.print(dbgs(), TRI); dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; }); return false; @@ -1313,8 +1477,8 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { SmallVector<MachineInstr*, 8> DupCopies; - LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); + LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; }); // Loop over the value numbers of the LHS, seeing if any are defined from // the RHS. @@ -1337,7 +1501,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // from the RHS interval, we can use its value #. MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; LHSValsDefinedFromRHS[VNI] = lr->valno; @@ -1364,7 +1528,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // from the LHS interval, we can use its value #. MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; RHSValsDefinedFromLHS[VNI] = lr->valno; @@ -1486,7 +1650,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // and mark the X as coalesced to keep the illusion. unsigned Src = MI->getOperand(1).getReg(); SourceRegisters.push_back(Src); - MI->getOperand(0).substVirtReg(Src, 0, *tri_); + MI->getOperand(0).substVirtReg(Src, 0, *TRI); markAsJoined(MI); } @@ -1495,13 +1659,13 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // that B = X is gone. for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), E = SourceRegisters.end(); I != E; ++I) { - li_->shrinkToUses(&li_->getInterval(*I)); + LIS->shrinkToUses(&LIS->getInterval(*I)); } // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); + MRI); return true; } @@ -1552,7 +1716,7 @@ void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty()) ImpDefCopies.push_back(Inst); else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(Inst); @@ -1590,9 +1754,9 @@ void RegisterCoalescer::joinIntervals() { DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); std::vector<MachineInstr*> TryAgainList; - if (loopInfo->empty()) { + if (Loops->empty()) { // If there are no loops in the function, join intervals in function order. - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) CopyCoalesceInMBB(I, TryAgainList); } else { @@ -1603,9 +1767,9 @@ void RegisterCoalescer::joinIntervals() { // Join intervals in the function prolog first. We want to join physical // registers with virtual registers before the intervals got too long. std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ MachineBasicBlock *MBB = I; - MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); } // Sort by loop depth. @@ -1644,22 +1808,22 @@ void RegisterCoalescer::releaseMemory() { } bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - li_ = &getAnalysis<LiveIntervals>(); - ldv_ = &getAnalysis<LiveDebugVariables>(); + MF = &fn; + MRI = &fn.getRegInfo(); + TM = &fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + LIS = &getAnalysis<LiveIntervals>(); + LDV = &getAnalysis<LiveDebugVariables>(); AA = &getAnalysis<AliasAnalysis>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + Loops = &getAnalysis<MachineLoopInfo>(); DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); + << ((Value*)MF->getFunction())->getName() << '\n'); if (VerifyCoalescing) - mf_->verify(this, "Before register coalescing"); + MF->verify(this, "Before register coalescing"); RegClassInfo.runOnMachineFunction(fn); @@ -1668,9 +1832,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { joinIntervals(); DEBUG({ dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I){ - I->second->print(dbgs(), tri_); + I->second->print(dbgs(), TRI); dbgs() << "\n"; } }); @@ -1678,8 +1842,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Perform a final pass over the instructions and compute spill weights // and remove identity moves. - SmallVector<unsigned, 4> DeadDefs; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + SmallVector<unsigned, 4> DeadDefs, InflateRegs; + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); @@ -1690,6 +1854,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { bool DoDelete = true; assert(MI->isCopyLike() && "Unrecognized copy instruction"); unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + // Collect candidates for register class inflation. + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) + InflateRegs.push_back(SrcReg); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) + InflateRegs.push_back(DstReg); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && MI->getNumOperands() > 2) // Do not delete extract_subreg, insert_subreg of physical @@ -1701,8 +1875,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { if (MI->allDefsAreDead()) { if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - li_->hasInterval(SrcReg)) - li_->shrinkToUses(&li_->getInterval(SrcReg)); + LIS->hasInterval(SrcReg)) + LIS->shrinkToUses(&LIS->getInterval(SrcReg)); DoDelete = true; } if (!DoDelete) { @@ -1711,10 +1885,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MI->RemoveOperand(3); MI->RemoveOperand(1); } - MI->setDesc(tii_->get(TargetOpcode::KILL)); + MI->setDesc(TII->get(TargetOpcode::KILL)); mii = llvm::next(mii); } else { - li_->RemoveMachineInstrFromMaps(MI); + LIS->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; } @@ -1731,12 +1905,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { DeadDefs.push_back(Reg); + // Remat may also enable register class inflation. + if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) + InflateRegs.push_back(Reg); + } if (MO.isDead()) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !mri_->use_nodbg_empty(Reg)) { + !MRI->use_nodbg_empty(Reg)) { isDead = false; break; } @@ -1745,9 +1923,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { while (!DeadDefs.empty()) { unsigned DeadDef = DeadDefs.back(); DeadDefs.pop_back(); - RemoveDeadDef(li_->getInterval(DeadDef), MI); + RemoveDeadDef(LIS->getInterval(DeadDef), MI); } - li_->RemoveMachineInstrFromMaps(mii); + LIS->RemoveMachineInstrFromMaps(mii); mii = mbbi->erase(mii); continue; } else @@ -1757,14 +1935,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { ++mii; // Check for now unnecessary kill flags. - if (li_->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); + if (LIS->isNotInMIMap(MI)) continue; + SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; unsigned reg = MO.getReg(); - if (!reg || !li_->hasInterval(reg)) continue; - if (!li_->getInterval(reg).killedAt(DefIdx)) { + if (!reg || !LIS->hasInterval(reg)) continue; + if (!LIS->getInterval(reg).killedAt(DefIdx)) { MO.setIsKill(false); continue; } @@ -1772,26 +1950,40 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // remain alive. if (!TargetRegisterInfo::isPhysicalRegister(reg)) continue; - for (const unsigned *SR = tri_->getSubRegisters(reg); + for (const unsigned *SR = TRI->getSubRegisters(reg); unsigned S = *SR; ++SR) - if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, tri_); + if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) + MI->addRegisterDefined(S, TRI); } } } + // After deleting a lot of copies, register classes may be less constrained. + // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> + // DPR inflation. + array_pod_sort(InflateRegs.begin(), InflateRegs.end()); + InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), + InflateRegs.end()); + DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); + for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { + unsigned Reg = InflateRegs[i]; + if (MRI->reg_nodbg_empty(Reg)) + continue; + if (MRI->recomputeRegClass(Reg, *TM)) { + DEBUG(dbgs() << PrintReg(Reg) << " inflated to " + << MRI->getRegClass(Reg)->getName() << '\n'); + ++NumInflated; + } + } + DEBUG(dump()); - DEBUG(ldv_->dump()); + DEBUG(LDV->dump()); if (VerifyCoalescing) - mf_->verify(this, "After register coalescing"); + MF->verify(this, "After register coalescing"); return true; } /// print - Implement the dump method. void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { - li_->print(O, m); -} - -RegisterCoalescer *llvm::createRegisterCoalescer() { - return new RegisterCoalescer(); + LIS->print(O, m); } diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h index 4131d91..472c483 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -12,198 +12,60 @@ // //===----------------------------------------------------------------------===// -#include "RegisterClassInfo.h" -#include "llvm/Support/IncludeFile.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/ADT/SmallPtrSet.h" - #ifndef LLVM_CODEGEN_REGISTER_COALESCER_H #define LLVM_CODEGEN_REGISTER_COALESCER_H namespace llvm { - class MachineFunction; - class RegallocQuery; - class AnalysisUsage; class MachineInstr; class TargetRegisterInfo; class TargetRegisterClass; class TargetInstrInfo; - class LiveDebugVariables; - class VirtRegMap; - class MachineLoopInfo; - - class CoalescerPair; - - /// An abstract interface for register coalescers. Coalescers must - /// implement this interface to be part of the coalescer analysis - /// group. - class RegisterCoalescer : public MachineFunctionPass { - MachineFunction* mf_; - MachineRegisterInfo* mri_; - const TargetMachine* tm_; - const TargetRegisterInfo* tri_; - const TargetInstrInfo* tii_; - LiveIntervals *li_; - LiveDebugVariables *ldv_; - const MachineLoopInfo* loopInfo; - AliasAnalysis *AA; - RegisterClassInfo RegClassInfo; - - /// JoinedCopies - Keep track of copies eliminated due to coalescing. - /// - SmallPtrSet<MachineInstr*, 32> JoinedCopies; - - /// ReMatCopies - Keep track of copies eliminated due to remat. - /// - SmallPtrSet<MachineInstr*, 32> ReMatCopies; - - /// ReMatDefs - Keep track of definition instructions which have - /// been remat'ed. - SmallPtrSet<MachineInstr*, 8> ReMatDefs; - - /// joinIntervals - join compatible live intervals - void joinIntervals(); - - /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting - /// copies that cannot yet be coalesced into the "TryAgain" list. - void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<MachineInstr*> &TryAgain); - - /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, - /// which are the src/dst of the copy instruction CopyMI. This returns true - /// if the copy was successfully coalesced away. If it is not currently - /// possible to coalesce this interval, but it may be possible if other - /// things get coalesced, then it returns true by reference in 'Again'. - bool JoinCopy(MachineInstr *TheCopy, bool &Again); - - /// JoinIntervals - Attempt to join these two intervals. On failure, this - /// returns false. The output "SrcInt" will not have been modified, so we can - /// use this information below to update aliases. - bool JoinIntervals(CoalescerPair &CP); - - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If - /// the source value number is defined by a copy from the destination reg - /// see if we can merge these two destination reg valno# into a single - /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); - - /// HasOtherReachingDefs - Return true if there are definitions of IntB - /// other than BValNo val# that can reach uses of AValno val# of IntA. - bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, - VNInfo *AValNo, VNInfo *BValNo); - - /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. - /// If the source value number is defined by a commutable instruction and - /// its other operand is coalesced to the copy dest register, see if we - /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - - /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial - /// computation, replace the copy by rematerialize the definition. - /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. - bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, - unsigned DstReg, unsigned DstSubIdx, - MachineInstr *CopyMI); - - /// shouldJoinPhys - Return true if a physreg copy should be joined. - bool shouldJoinPhys(CoalescerPair &CP); - - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce - /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC); - - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and - /// update the subregister number if it is not zero. If DstReg is a - /// physical register and the existing subregister number of the def / use - /// being updated is not zero, make sure to set it to the correct physical - /// subregister. - void UpdateRegDefsUses(const CoalescerPair &CP); - - /// RemoveDeadDef - If a def of a live interval is now determined dead, - /// remove the val# it defines. If the live interval becomes empty, remove - /// it as well. - bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); - - /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the - /// VNInfo copy flag for DstReg and all aliases. - void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); - - /// markAsJoined - Remember that CopyMI has already been joined. - void markAsJoined(MachineInstr *CopyMI); - - public: - static char ID; // Class identification, replacement for typeinfo - RegisterCoalescer() : MachineFunctionPass(ID) { - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); - } - - /// Register allocators must call this from their own - /// getAnalysisUsage to cover the case where the coalescer is not - /// a Pass in the proper sense and isn't managed by PassManager. - /// PassManager needs to know which analyses to make available and - /// which to invalidate when running the register allocator or any - /// pass that might call coalescing. The long-term solution is to - /// allow hierarchies of PassManagers. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - virtual void releaseMemory(); - - /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; - }; /// CoalescerPair - A helper class for register coalescers. When deciding if /// two registers can be coalesced, CoalescerPair can determine if a copy /// instruction would become an identity copy after coalescing. class CoalescerPair { - const TargetInstrInfo &tii_; - const TargetRegisterInfo &tri_; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; - /// dstReg_ - The register that will be left after coalescing. It can be a + /// DstReg - The register that will be left after coalescing. It can be a /// virtual or physical register. - unsigned dstReg_; + unsigned DstReg; - /// srcReg_ - the virtual register that will be coalesced into dstReg. - unsigned srcReg_; + /// SrcReg - the virtual register that will be coalesced into dstReg. + unsigned SrcReg; - /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the - /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// subReg_ - The subregister index of srcReg in DstReg. It is possible the + /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a /// virtual register. - unsigned subIdx_; + unsigned SubIdx; - /// partial_ - True when the original copy was a partial subregister copy. - bool partial_; + /// Partial - True when the original copy was a partial subregister copy. + bool Partial; - /// crossClass_ - True when both regs are virtual, and newRC is constrained. - bool crossClass_; + /// CrossClass - True when both regs are virtual, and newRC is constrained. + bool CrossClass; - /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy - /// instruction. - bool flipped_; + /// Flipped - True when DstReg and SrcReg are reversed from the oriignal + /// copy instruction. + bool Flipped; - /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// NewRC - The register class of the coalesced register, or NULL if DstReg /// is a physreg. - const TargetRegisterClass *newRC_; + const TargetRegisterClass *NewRC; public: CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) - : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), - partial_(false), crossClass_(false), flipped_(false), newRC_(0) {} + : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); - /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible - /// because dstReg_ is a physical register, or subIdx_ is set. + /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible + /// because DstReg is a physical register, or SubIdx is set. bool flip(); /// isCoalescable - Return true if MI is a copy instruction that will become @@ -211,32 +73,33 @@ namespace llvm { bool isCoalescable(const MachineInstr*) const; /// isPhys - Return true if DstReg is a physical register. - bool isPhys() const { return !newRC_; } + bool isPhys() const { return !NewRC; } - /// isPartial - Return true if the original copy instruction did not copy the - /// full register, but was a subreg operation. - bool isPartial() const { return partial_; } + /// isPartial - Return true if the original copy instruction did not copy + /// the full register, but was a subreg operation. + bool isPartial() const { return Partial; } - /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's. - bool isCrossClass() const { return crossClass_; } + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller + /// register class than DstReg's. + bool isCrossClass() const { return CrossClass; } /// isFlipped - Return true when getSrcReg is the register being defined by /// the original copy instruction. - bool isFlipped() const { return flipped_; } + bool isFlipped() const { return Flipped; } /// getDstReg - Return the register (virtual or physical) that will remain /// after coalescing. - unsigned getDstReg() const { return dstReg_; } + unsigned getDstReg() const { return DstReg; } /// getSrcReg - Return the virtual register that will be coalesced away. - unsigned getSrcReg() const { return srcReg_; } + unsigned getSrcReg() const { return SrcReg; } /// getSubIdx - Return the subregister index in DstReg that SrcReg will be /// coalesced into, or 0. - unsigned getSubIdx() const { return subIdx_; } + unsigned getSubIdx() const { return SubIdx; } /// getNewRC - Return the register class of the coalesced register. - const TargetRegisterClass *getNewRC() const { return newRC_; } + const TargetRegisterClass *getNewRC() const { return NewRC; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 9e9a145..ca02aa1 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -206,6 +206,7 @@ void RegScavenger::forward() { break; } assert(SubUsed && "Using an undefined register!"); + (void)SubUsed; } assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && "Using an early clobbered register!"); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 21375b2..1e9b5c8 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -26,7 +26,7 @@ using namespace llvm; #ifndef NDEBUG -cl::opt<bool> StressSchedOpt( +static cl::opt<bool> StressSchedOpt( "stress-sched", cl::Hidden, cl::init(false), cl::desc("Stress test instruction scheduling")); #endif @@ -140,6 +140,7 @@ void SUnit::removePred(const SDep &D) { break; } assert(FoundSucc && "Mismatching preds / succs lists!"); + (void)FoundSucc; Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 446adfc..34b8ab0 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -36,7 +36,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineDominatorTree &mdt) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), - Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), + Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT), FirstDbgValue(0) { DbgValues.clear(); } @@ -134,6 +134,7 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, } void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { + LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) { MachineBasicBlock *Header = ML->getHeader(); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h index 8a4ea85..666bdf5 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h @@ -48,7 +48,8 @@ namespace llvm { /// VisitLoop - Clear out any previous state and analyze the given loop. /// void VisitLoop(const MachineLoop *Loop) { - Deps.clear(); + assert(Deps.empty() && "stale loop dependencies"); + MachineBasicBlock *Header = Loop->getHeader(); SmallSet<unsigned, 8> LoopLiveIns; for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), @@ -109,7 +110,7 @@ namespace llvm { /// initialized and destructed for each block. std::vector<std::vector<SUnit *> > Defs; std::vector<std::vector<SUnit *> > Uses; - + /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here /// to minimize construction/destruction. @@ -127,7 +128,7 @@ namespace llvm { protected: /// DbgValues - Remember instruction that preceeds DBG_VALUE. - typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > + typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > DbgValueVector; DbgValueVector DbgValues; MachineInstr *FirstDbgValue; diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 0e005d3..b80c01e 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -213,7 +213,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { freeUnits = freeUnit & (freeUnit - 1); } while (freeUnits); - assert(freeUnit && "No function unit available!"); if (IS->getReservationKind() == InstrStage::Required) RequiredScoreboard[cycle + i] |= freeUnit; else diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4f0d2ca..7b87868 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -216,6 +216,7 @@ namespace { SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitMEMBARRIER(SDNode *N); @@ -1105,6 +1106,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::MEMBARRIER: return visitMEMBARRIER(N); } @@ -1526,12 +1528,6 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - // If both operands are null we know that carry out will always be false. - if (N0C && N0C->isNullValue() && N0 == N1) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), - MVT::Glue)); - // canonicalize constant to RHS if (N0C && !N1C) return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), @@ -3763,7 +3759,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && + TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4118,7 +4114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -4132,7 +4128,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); @@ -4348,7 +4344,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, @@ -4364,7 +4360,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, @@ -4532,7 +4528,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -4546,7 +4542,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); @@ -6479,7 +6475,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); - const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) return SDValue(); @@ -6542,7 +6538,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); - const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -6776,6 +6772,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); // If the inserted element is an UNDEF, just use the input vector. if (InVal.getOpcode() == ISD::UNDEF) @@ -6787,32 +6784,40 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return SDValue(); - // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new - // vector with the inserted element. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(), - InVec.getNode()->op_end()); - if (Elt < Ops.size()) - Ops[Elt] = InVal; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &Ops[0], Ops.size()); - } - // If the invec is an UNDEF and if EltNo is a constant, create a new - // BUILD_VECTOR with undef elements and the inserted element. - if (InVec.getOpcode() == ISD::UNDEF && - isa<ConstantSDNode>(EltNo)) { - EVT EltVT = VT.getVectorElementType(); + // Check that we know which element is being inserted + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector<SDValue, 8> Ops; + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + Ops.append(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + } else if (InVec.getOpcode() == ISD::UNDEF) { unsigned NElts = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); + Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); + } else { + return SDValue(); + } - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - if (Elt < Ops.size()) - Ops[Elt] = InVal; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &Ops[0], Ops.size()); + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + if (InVal.getValueType() != OpVT) + InVal = OpVT.bitsGT(InVal.getValueType()) ? + DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : + DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); + Ops[Elt] = InVal; } - return SDValue(); + + // Return the new vector + return DAG.getNode(ISD::BUILD_VECTOR, dl, + VT, &Ops[0], Ops.size()); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { @@ -6896,7 +6901,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // If Idx was -1 above, Elt is going to be -1, so just return undef. if (Elt == -1) - return DAG.getUNDEF(LN0->getBasePtr().getValueType()); + return DAG.getUNDEF(LVT); unsigned Align = LN0->getAlignment(); if (NewLoad) { @@ -7028,6 +7033,36 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { + EVT NVT = N->getValueType(0); + SDValue V = N->getOperand(0); + + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + // Handle only simple case where vector being inserted and vector + // being extracted are of same type, and are half size of larger vectors. + EVT BigVT = V->getOperand(0).getValueType(); + EVT SmallVT = V->getOperand(1).getValueType(); + if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + return SDValue(); + + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indicies are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + // + SDValue InsIdx = N->getOperand(1); + SDValue ExtIdx = V->getOperand(2); + + if (InsIdx == ExtIdx) + return V->getOperand(1); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, + V->getOperand(0), N->getOperand(1)); + } + + return SDValue(); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -7447,7 +7482,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const_cast<ConstantFP*>(FV->getConstantFPValue()), const_cast<ConstantFP*>(TV->getConstantFPValue()) }; - const Type *FPTy = Elts[0]->getType(); + Type *FPTy = Elts[0]->getType(); const TargetData &TD = *TLI.getTargetData(); // Create a ConstantArray of the two constants. @@ -7465,10 +7500,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Cond = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), N0, N1, CC); + AddToWorkList(Cond.getNode()); SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), Cond, One, Zero); + AddToWorkList(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, CstOffset); + AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, Alignment); @@ -7553,7 +7591,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType().isVector()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 54a7d43..e8f8c73 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -66,17 +66,22 @@ using namespace llvm; void FastISel::startNewBlock() { LocalValueMap.clear(); - // Start out as null, meaining no local-value instructions have - // been emitted. - LastLocalValue = 0; + EmitStartPt = 0; - // Advance the last local value past any EH_LABEL instructions. + // Advance the emit start point past any EH_LABEL instructions. MachineBasicBlock::iterator I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { - LastLocalValue = I; + EmitStartPt = I; ++I; } + LastLocalValue = EmitStartPt; +} + +void FastISel::flushLocalValueMap() { + LocalValueMap.clear(); + LastLocalValue = EmitStartPt; + recomputeInsertPt(); } bool FastISel::hasTrivialKill(const Value *V) const { @@ -183,7 +188,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, APFloat::rmTowardZero, &isExact); if (isExact) { - APInt IntVal(IntBitWidth, 2, x); + APInt IntVal(IntBitWidth, x); unsigned IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), IntVal)); @@ -422,12 +427,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); - const Type *Ty = I->getOperand(0)->getType(); + Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, E = I->op_end(); OI != E; ++OI) { const Value *Idx = *OI; - if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + if (StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -489,7 +494,7 @@ bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. - if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) { + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; @@ -526,13 +531,10 @@ bool FastISel::SelectCall(const User *I) { unsigned Reg = 0; unsigned Offset = 0; if (const Argument *Arg = dyn_cast<Argument>(Address)) { - if (Arg->hasByValAttr()) { - // Byval arguments' frame index is recorded during argument lowering. - // Use this info directly. - Offset = FuncInfo.getByValArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); - } + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) Reg = getRegForValue(Address); @@ -645,6 +647,16 @@ bool FastISel::SelectCall(const User *I) { } } + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + if (!isa<IntrinsicInst>(F)) + flushLocalValueMap(); + // An arbitrary call. Bail. return false; } @@ -839,7 +851,7 @@ FastISel::SelectExtractValue(const User *U) { return false; const Value *Op0 = EVI->getOperand(0); - const Type *AggTy = Op0->getType(); + Type *AggTy = Op0->getType(); // Get the base result register. unsigned ResultReg; @@ -1074,7 +1086,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, if (MaterialReg == 0) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. - const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d518b5d..b052740 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -78,7 +78,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { - const Type *Ty = AI->getAllocatedType(); + Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), @@ -216,7 +216,7 @@ unsigned FunctionLoweringInfo::CreateReg(EVT VT) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, Ty, ValueVTs); @@ -260,7 +260,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { /// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination /// register based on the LiveOutInfo of its operands. void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { - const Type *Ty = PN->getType(); + Type *Ty = PN->getType(); if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; @@ -351,20 +351,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } } -/// setByValArgumentFrameIndex - Record frame index for the byval +/// setArgumentFrameIndex - Record frame index for the byval /// argument. This overrides previous frame index entry for this argument, /// if any. -void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, +void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, int FI) { - assert (A->hasByValAttr() && "Argument does not have byval attribute!"); ByValArgFrameIndexMap[A] = FI; } -/// getByValArgumentFrameIndex - Get frame index for the byval argument. +/// getArgumentFrameIndex - Get frame index for the byval argument. /// If the argument does not have any assigned frame index then 0 is /// returned. -int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { - assert (A->hasByValAttr() && "Argument does not have byval attribute!"); +int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { DenseMap<const Argument *, int>::iterator I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) @@ -454,3 +452,34 @@ void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, break; } } + +/// AddLandingPadInfo - Extract the exception handling information from the +/// landingpad instruction and add them to the specified machine module info. +void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, + MachineBasicBlock *MBB) { + MMI.addPersonality(MBB, + cast<Function>(I.getPersonalityFn()->stripPointerCasts())); + + if (I.isCleanup()) + MMI.addCleanup(MBB); + + // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, + // but we need to do it this way because of how the DWARF EH emitter + // processes the clauses. + for (unsigned i = I.getNumClauses(); i != 0; --i) { + Value *Val = I.getClause(i - 1); + if (I.isCatch(i - 1)) { + MMI.addCatchTypeInfo(MBB, + dyn_cast<GlobalVariable>(Val->stripPointerCasts())); + } else { + // Add filters in a list. + Constant *CVal = cast<Constant>(Val); + SmallVector<const GlobalVariable*, 4> FilterList; + for (User::op_iterator + II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) + FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts())); + + MMI.addFilterTypeInfo(MBB, FilterList); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index f0f4743..2ff66f8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -30,6 +30,12 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; +/// MinRCSize - Smallest register class we allow when constraining virtual +/// registers. If satisfying all register class constraints would require +/// using a smaller register class, emit a COPY to a new virtual register +/// instead. +const unsigned MinRCSize = 4; + /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional glue operands (which do /// not go into the resulting MachineInstr). @@ -87,7 +93,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UI != E; ++UI) { SDNode *User = *UI; bool Match = true; - if (User->getOpcode() == ISD::CopyToReg && + if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); @@ -113,7 +119,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (!UseRC) UseRC = RC; else if (RC) { - const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC); + const TargetRegisterClass *ComRC = + TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -139,7 +146,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { DstRC = TLI->getRegClassFor(VT); } - + // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. if (MatchReg && SrcRC->getCopyCost() < 0) { @@ -167,7 +174,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, return 0; SDNode *User = *Node->use_begin(); - if (User->getOpcode() == ISD::CopyToReg && + if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); @@ -202,7 +209,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; - if (User->getOpcode() == ISD::CopyToReg && + if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); @@ -280,15 +287,16 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create - // a new virtual register and copy the value into it. + // a new virtual register and copy the value into it, but first attempt to + // shrink VReg's register class within reason. For example, if VReg == GR32 + // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { - const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TII->getRegClass(*II, IIOpNum, TRI); assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); - if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { + if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -326,7 +334,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the -/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// operand number (in the II) that we are adding. IIOpNum and II are used for /// assertions only. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, @@ -356,7 +364,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); - const Type *Type = CP->getType(); + Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { Align = TM->getTargetData()->getPrefTypeAlignment(Type); @@ -365,7 +373,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, Align = TM->getTargetData()->getTypeAllocSize(Type); } } - + unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) @@ -389,35 +397,44 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } } -/// getSuperRegisterRegClass - Returns the register class of a superreg A whose -/// "SubIdx"'th sub-register class is the specified register class and whose -/// type matches the specified type. -static const TargetRegisterClass* -getSuperRegisterRegClass(const TargetRegisterClass *TRC, - unsigned SubIdx, EVT VT) { - // Pick the register class of the superegister for this type - for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), - E = TRC->superregclasses_end(); I != E; ++I) - if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) - return *I; - assert(false && "Couldn't find the register class"); - return 0; +unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + EVT VT, DebugLoc DL) { + const TargetRegisterClass *VRC = MRI->getRegClass(VReg); + const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); + + // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg + // within reason. + if (RC && RC != VRC) + RC = MRI->constrainRegClass(VReg, RC, MinRCSize); + + // VReg has been adjusted. It can be used with SubIdx operands now. + if (RC) + return VReg; + + // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual + // register instead. + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + assert(RC && "No legal register class for VT supports that SubIdx"); + unsigned NewReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) + .addReg(VReg); + return NewReg; } /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, +void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); - + // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; - if (User->getOpcode() == ISD::CopyToReg && + if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { @@ -426,12 +443,14 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } } } - + if (Opc == TargetOpcode::EXTRACT_SUBREG) { - // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no + // constraints on the %dst register, COPY can target all legal register + // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0)); - // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); unsigned SrcReg, DstReg, DefSubIdx; @@ -443,62 +462,57 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 - const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg); VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } else { - const TargetRegisterClass *TRC = MRI->getRegClass(VReg); - const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); - assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); - - // Figure out the register class to create for the destreg. - // Note that if we're going to directly use an existing register, - // it must be precisely the required class, and not a subclass - // thereof. - if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { - // Create the reg - assert(SRC && "Couldn't find source register class"); - VRBase = MRI->createVirtualRegister(SRC); - } + // VReg may not support a SubIdx sub-register, and we may need to + // constrain its register class or issue a COPY to a compatible register + // class. + VReg = ConstrainForSubReg(VReg, SubIdx, + Node->getOperand(0).getValueType(), + Node->getDebugLoc()); - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase); + // Create the destreg if it is missing. + if (VRBase == 0) + VRBase = MRI->createVirtualRegister(TRC); - // Add source, and subreg index - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, - IsClone, IsCloned); - assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&& - "Cannot yet extract from physregs"); - MI->getOperand(1).setSubReg(SubIdx); - MBB->insert(InsertPos, MI); + // Create the extract_subreg machine instruction. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); - unsigned SubReg = getVR(N1, VRBaseMap); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); - - // Figure out the register class to create for the destreg. - // Note that if we're going to directly use an existing register, - // it must be precisely the required class, and not a subclass - // thereof. - if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { - // Create the reg - assert(SRC && "Couldn't find source register class"); + + // Figure out the register class to create for the destreg. It should be + // the largest legal register class supporting SubIdx sub-registers. + // RegisterCoalescer will constrain it further if it decides to eliminate + // the INSERT_SUBREG instruction. + // + // %dst = INSERT_SUBREG %src, %sub, SubIdx + // + // is lowered by TwoAddressInstructionPass to: + // + // %dst = COPY %src + // %dst:SubIdx = COPY %sub + // + // There is no constraint on the %src register class. + // + const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0)); + SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); + assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); + + if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) VRBase = MRI->createVirtualRegister(SRC); - } // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); - + // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { @@ -514,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, MBB->insert(InsertPos, MI); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); - + SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. @@ -643,9 +657,9 @@ void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); - + // Handle subreg insert/extract specially - if (Opc == TargetOpcode::EXTRACT_SUBREG || + if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); @@ -667,7 +681,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; - + const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); @@ -712,12 +726,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Then mark unused registers as dead. MI->setPhysRegsDeadExcept(UsedRegs, *TRI); } - + // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); - + // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = II.getNumDefs() > NumResults; @@ -751,7 +765,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->addRegisterDead(Reg, TRI); } } - + // If the instruction has implicit defs and the node doesn't, mark the // implicit def as dead. If the node has any glue outputs, we don't do this // because we don't know what implicit defs are being used by glued nodes. @@ -761,6 +775,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, i != e; ++i) MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); } + + // Run post-isel target hook to adjust this instruction if needed. +#ifdef NDEBUG + if (II.hasPostISelHook()) +#endif + TLI->AdjustInstrPostInstrSelection(MI, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and @@ -788,7 +808,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); - + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; @@ -808,12 +828,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, TII->get(TargetOpcode::EH_LABEL)).addSym(S); break; } - + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. - + // Create the inline asm machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::INLINEASM)); @@ -822,7 +842,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - + // Add the HasSideEffect and isAlignStack bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> @@ -834,10 +854,10 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - + MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. - + switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: @@ -873,13 +893,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } } - + // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); if (MD) MI->addOperand(MachineOperand::CreateMetadata(MD)); - + MBB->insert(InsertPos, MI); break; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 19fc044..c081f38 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -77,6 +77,12 @@ class InstrEmitter { DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); + /// ConstrainForSubReg - Try to constrain VReg to a register class that + /// supports SubIdx sub-registers. Emit a copy if that isn't possible. + /// Return the virtual register to use. + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + EVT VT, DebugLoc DL); + /// EmitSubregNode - Generate machine code for subreg nodes. /// void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d06e2bd..63255ae 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -53,10 +53,15 @@ class SelectionDAGLegalize { // Libcall insertion helpers. - /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SmallVector<SDValue, 8> LastCALLSEQ; + SDValue LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This @@ -149,15 +154,6 @@ private: void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); - - SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } - void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } - void pushLastCALLSEQ(SDValue s) { - LastCALLSEQ.push_back(s); - } - void popLastCALLSEQ() { - LastCALLSEQ.pop_back(); - } }; } @@ -199,7 +195,8 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - pushLastCALLSEQ(DAG.getEntryNode()); + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -227,15 +224,14 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - int next_depth = depth; + // Nested CALLSEQ_START/END constructs aren't yet legal, + // but we can DTRT and handle them correctly here. if (Node->getOpcode() == ISD::CALLSEQ_START) - next_depth = depth + 1; - if (Node->getOpcode() == ISD::CALLSEQ_END) { - assert(depth > 0 && "negative depth!"); - if (depth == 1) + depth++; + else if (Node->getOpcode() == ISD::CALLSEQ_END) { + depth--; + if (depth == 0) return Node; - else - next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -266,7 +262,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, depth)) return Result; } return 0; @@ -287,7 +283,6 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { case ISD::CALLSEQ_START: if (!nested) return Node; - Node = Node->getOperand(0).getNode(); nested--; break; case ISD::CALLSEQ_END: @@ -295,7 +290,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { break; } } - return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0; + return 0; } /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -365,7 +360,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { - const Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); VT = SVT; Extend = true; @@ -819,6 +814,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } + case ISD::ATOMIC_STORE: { + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(2).getValueType()); + break; + } case ISD::SELECT_CC: case ISD::SETCC: case ISD::BR_CC: { @@ -872,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; - case ISD::TRAMPOLINE: + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: // These operations lie about being legal: when they claim to be legal, @@ -912,12 +913,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); - // Branches tweak the chain to include LastCALLSEQ + // Branches tweak the chain to include LastCALLSEQ_END Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - getLastCALLSEQ()); + LastCALLSEQ_END); Ops[0] = LegalizeOp(Ops[0]); - setLastCALLSEQ(DAG.getEntryNode()); + LastCALLSEQ_END = DAG.getEntryNode(); break; case ISD::SHL: case ISD::SRL: @@ -989,6 +989,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { #endif assert(0 && "Do not know how to legalize this operator!"); + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + break; + } + case ISD::BUILD_VECTOR: switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { default: assert(0 && "This action is not supported yet!"); @@ -1006,7 +1031,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case ISD::CALLSEQ_START: { SDNode *CallEnd = FindCallEndFromCallStart(Node); - assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1023,9 +1047,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, getLastCALLSEQ()); + Tmp1, LastCALLSEQ_END); Tmp1 = LegalizeOp(Tmp1); } @@ -1046,29 +1070,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); // Note that we are selecting this call! - setLastCALLSEQ(SDValue(CallEnd, 0)); + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(getLastCALLSEQ()); + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); return Result; } case ISD::CALLSEQ_END: - { - SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); - - // If the CALLSEQ_START node hasn't been legalized first, legalize it. - // This will cause this node to be legalized as well as handling libcalls - // right. - if (getLastCALLSEQ().getNode() != Node) { - LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; } // Otherwise, the call start has been legalized and everything is going @@ -1096,8 +1116,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getResNo()); } } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); // This finishes up call legalization. - popLastCALLSEQ(); + IsLegalizingCall = false; // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); @@ -1124,7 +1145,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), @@ -1311,7 +1332,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); @@ -1491,7 +1512,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned store and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), @@ -1596,7 +1617,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned store and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), @@ -1611,82 +1632,101 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT WideScalarVT = Tmp3.getValueType().getScalarType(); EVT NarrowScalarVT = StVT.getScalarType(); - // The Store type is illegal, must scalarize the vector store. - SmallVector<SDValue, 8> Stores; - bool ScalarLegal = TLI.isTypeLegal(WideScalarVT); - if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + if (StVT.isVector()) { unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Tmp3.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + bool RegScalarLegal = TLI.isTypeLegal(RegSclVT); + bool MemScalarLegal = TLI.isTypeLegal(MemSclVT); + + // We need to expand this store. If the register element type + // is legal then we can scalarize the vector and use + // truncating stores. + if (RegScalarLegal) { + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // This scalar TruncStore may be illegal, but we lehalize it + // later. + SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, Alignment); - unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - // Types smaller than 8 bits are promoted to 8 bits. - ScalarSize = std::max<unsigned>(ScalarSize, 8); - // Store stride - unsigned Stride = ScalarSize/8; - assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); - - - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + Stores.push_back(Store); + } - Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), - isVolatile, isNonTemporal, Alignment); - Stores.push_back(Store); + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - // The Store type is illegal, must scalarize the vector store. - // However, the scalar type is illegal. Must bitcast the result - // and store it in smaller parts. - if (!TLI.isTypeLegal(StVT) && StVT.isVector()) { - unsigned WideNumElem = StVT.getVectorNumElements(); - unsigned Stride = NarrowScalarVT.getSizeInBits()/8; - - unsigned SizeRatio = - (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); - - EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, - SizeRatio*WideNumElem); - - // Cast the wide elem vector to wider vec with smaller elem type. - // Example <2 x i64> -> <4 x i32> - Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); - - for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { - // Extract elment i - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); - // bump pointer. - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // Store if, this element is: - // - First element on big endian, or - // - Last element on little endian - if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || - ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { - SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), - isVolatile, isNonTemporal, Alignment); - Stores.push_back(Store); + // The scalar register type is illegal. + // For example saving <2 x i64> -> <2 x i32> on a x86. + // In here we bitcast the value into a vector of smaller parts and + // save it using smaller scalars. + if (!RegScalarLegal && MemScalarLegal) { + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits()/8; + + unsigned SizeRatio = + (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), + MemSclVT, + SizeRatio * NumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + SmallVector<SDValue, 8> Stores; + for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) { + // Extract the Ith element. + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // Bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } + + assert(false && "Unable to legalize the vector trunc store!"); + }// is vector // TRUNCSTORE:i16 i32 -> STORE i16 @@ -1999,7 +2039,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); - const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is @@ -2106,7 +2146,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { } } else { assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); - const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } @@ -2150,6 +2190,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2159,7 +2200,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; @@ -2169,7 +2210,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. @@ -2185,7 +2226,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return DAG.getRoot(); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo.first; @@ -2210,7 +2251,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, @@ -2231,13 +2272,14 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; @@ -2248,7 +2290,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -2256,7 +2298,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2360,13 +2402,13 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue InChain = DAG.getEntryNode(); EVT RetVT = Node->getValueType(0); - const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; @@ -2397,7 +2439,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, LegalizeOp(CallInfo.second); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2955,8 +2997,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; + case ISD::ATOMIC_FENCE: case ISD::MEMBARRIER: { // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), @@ -2969,6 +3013,32 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(CallResult.second); break; } + case ISD::ATOMIC_LOAD: { + // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. + SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + // There is no libcall for atomic store; fake it with ATOMIC_SWAP. + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(1)); + break; + } // By default, atomic intrinsics are marked Legal and lowered. Targets // which don't support them directly, however, may want libcalls, in which // case they mark them Expand, and we get here. @@ -3727,8 +3797,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); - setLastCALLSEQ(DAG.getEntryNode()); + LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e6835d8..7c1cc69 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -55,6 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: @@ -107,6 +108,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return BitConvertToInteger(Op); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), @@ -827,11 +834,11 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to expand the result of this operator!"); - case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; @@ -879,10 +886,10 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); - Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, - &C.getRawData()[1])), NVT); - Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, - &C.getRawData()[0])), NVT); + Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])), + NVT); + Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])), + NVT); } void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, @@ -1201,7 +1208,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; - const uint64_t *Parts = 0; + ArrayRef<uint64_t> Parts; switch (SrcVT.getSimpleVT().SimpleTy) { default: @@ -1218,7 +1225,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APInt(128, 2, Parts)), + DAG.getConstantFP(APFloat(APInt(128, Parts)), MVT::ppcf128)); Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), Lo, Hi, DAG.getCondCode(ISD::SETLT)); @@ -1291,8 +1298,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); - EVT VT = NewLHS.getValueType(); - assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); + assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!"); // FIXME: This generated code sucks. We want to generate // FCMPU crN, hi1, hi2 @@ -1373,7 +1379,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APInt(128, 2, TwoE31)); + APFloat APF = APFloat(APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. @@ -1445,6 +1451,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getValue().getValueType()); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + (void)NVT; SDValue Lo, Hi; GetExpandedOp(ST->getValue(), Lo, Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e7c77dd..a5c4c2d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -48,6 +48,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to promote this operator!"); + case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break; case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; @@ -63,6 +64,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SHL: Res = PromoteIntRes_SHL(N); break; @@ -84,6 +86,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_BUILD_VECTOR(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::CONCAT_VECTORS: + Res = PromoteIntRes_CONCAT_VECTORS(N); break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -114,6 +118,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULO: case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + case ISD::ATOMIC_LOAD: + Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; + case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: @@ -136,6 +143,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { SetPromotedInteger(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetPromotedInteger(Op); +} + SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { // Sign-extend the new bits, and continue the assertion. SDValue Op = SExtPromotedInteger(N->getOperand(0)); @@ -150,12 +163,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { Op.getValueType(), Op, N->getOperand(1)); } +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { + EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), ResVT, + N->getChain(), N->getBasePtr(), + N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, N->getMemOperand()); + Op2, N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -167,7 +194,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op3 = GetPromotedInteger(N->getOperand(3)); SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getMemOperand()); + Op2, Op3, N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -457,6 +485,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { LHS.getValueType(), N->getOperand(0),LHS,RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { + SDValue Mask = GetPromotedInteger(N->getOperand(0)); + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), + LHS.getValueType(), Mask, LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(2)); SDValue RHS = GetPromotedInteger(N->getOperand(3)); @@ -467,16 +503,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); - assert(isTypeLegal(SVT) && "Illegal SetCC type!"); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + DebugLoc dl = N->getDebugLoc(); + assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + "Vector compare must return a vector result!"); // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0), + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), N->getOperand(1), N->getOperand(2)); - // Convert to the expected type. - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + // Convert to the expected type. return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); } @@ -707,6 +751,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::ATOMIC_STORE: + Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N)); + break; case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; @@ -721,6 +768,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; @@ -791,6 +839,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); } +SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), + N->getOrdering(), N->getSynchScope()); +} + SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { // This should only occur in unusual situations like bitcasting to an // x86_fp80, so just turn it into a store+load @@ -913,14 +968,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { - assert(OpNo == 0 && "Only know how to promote condition"); + assert(OpNo == 0 && "Only know how to promote the condition!"); + SDValue Cond = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); - SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); + EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ? + OpTy.getScalarType() : OpTy); + Cond = PromoteTargetBoolean(Cond, SVT); - return SDValue(DAG.UpdateNodeOperands(N, Cond, - N->getOperand(1), N->getOperand(2)), 0); + return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), + N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { @@ -1024,7 +1082,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to expand the result of this operator!"); - case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -1055,6 +1113,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break; case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -1546,6 +1605,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Res = DisintegrateMERGE_VALUES(N, ResNo); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2176,9 +2241,9 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - const Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = TLI.getPointerTy(); - const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); // A divide for UMULO should be faster than a function call. @@ -2222,7 +2287,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { EVT ArgVT = N->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = true; @@ -2321,6 +2386,20 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, } } +void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDValue Zero = DAG.getConstant(0, VT); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, + N->getOperand(0), + N->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); +} //===----------------------------------------------------------------------===// // Integer Operand Expansion @@ -2365,6 +2444,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; case ISD::RETURNADDR: case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + + case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -2742,6 +2823,19 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { return MakeLibCall(LC, DstVT, &Op, 1, true, dl); } +SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), + N->getOperand(0), + N->getOperand(1), N->getOperand(2), + cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + return Swap.getValue(1); +} + + SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); @@ -2775,7 +2869,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { - ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); @@ -2830,6 +2923,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + + SDValue Op0 = N->getOperand(1); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType() && + "Invalid input vector types"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT OutElemTy = NOutVT.getVectorElementType(); + + unsigned NumElem0 = Op0.getValueType().getVectorNumElements(); + unsigned NumElem1 = Op1.getValueType().getVectorNumElements(); + unsigned NumOutElem = NOutVT.getVectorNumElements(); + assert(NumElem0 + NumElem1 == NumOutElem && + "Invalid number of incoming elements"); + + // Take the elements from the first vector. + SmallVector<SDValue, 8> Ops(NumOutElem); + for (unsigned i = 0; i < NumElem0; ++i) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + Op0.getValueType().getScalarType(), Op0, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } + + // Take the elements from the second vector + for (unsigned i = 0; i < NumElem1; ++i) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + Op1.getValueType().getScalarType(), Op1, + DAG.getIntPtrConstant(i)); + Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2838,14 +2971,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT NOutVTElem = NOutVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); - - SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - N->getOperand(0)); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(1)); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT, - ConvertedVector, ConvElem, N->getOperand(2)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT, + V0, ConvElem, N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -2855,20 +2986,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); - return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); - + // EXTRACT_VECTOR_ELT can return types which are wider than the incoming + // element types. If this is the case then we need to expand the outgoing + // value and not truncate it. + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + unsigned NumElems = N->getNumOperands(); EVT RetSclrTy = N->getValueType(0).getVectorElementType(); SmallVector<SDValue, 8> NewOps; + NewOps.reserve(NumElems); // For each incoming vector - for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) { + for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) { SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ba658b0..a4bb577 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -946,6 +946,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { return true; } +SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (i != ResNo) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + return SDValue(N, ResNo); +} + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split into two not necessarily identical pieces. void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { @@ -1046,7 +1053,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, @@ -1067,7 +1074,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::ArgListEntry Entry; for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; @@ -1078,7 +1085,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -1093,24 +1100,8 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// type i1, the bits of which conform to getBooleanContents. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { DebugLoc dl = Bool.getDebugLoc(); - ISD::NodeType ExtendCode; - switch (TLI.getBooleanContents()) { - default: - assert(false && "Unknown BooleanContent!"); - case TargetLowering::UndefinedBooleanContent: - // Extend to VT by adding rubbish bits. - ExtendCode = ISD::ANY_EXTEND; - break; - case TargetLowering::ZeroOrOneBooleanContent: - // Extend to VT by adding zero bits. - ExtendCode = ISD::ZERO_EXTEND; - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: { - // Extend to VT by copying the sign bit. - ExtendCode = ISD::SIGN_EXTEND; - break; - } - } + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); return DAG.getNode(ExtendCode, dl, VT, Bool); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 952797d..abacdac 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -148,15 +148,22 @@ private: SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); bool CustomWidenLowerNode(SDNode *N, EVT VT); + + /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES + /// node with the corresponding input operand, except for the result 'ResNo', + /// which is returned. + SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl); - std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, bool isSigned); - std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); @@ -206,8 +213,10 @@ private: // Integer Result Promotion. void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_AssertSext(SDNode *N); SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); @@ -215,6 +224,7 @@ private: SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); @@ -232,6 +242,7 @@ private: SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_SDIV(SDNode *N); SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); @@ -249,6 +260,7 @@ private: // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); SDValue PromoteIntOp_BITCAST(SDNode *N); SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); @@ -264,6 +276,7 @@ private: SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); @@ -289,6 +302,8 @@ private: // Integer Result Expansion. void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -320,6 +335,8 @@ private: void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -339,6 +356,7 @@ private: SDValue ExpandIntOp_TRUNCATE(SDNode *N); SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); SDValue ExpandIntOp_RETURNADDR(SDNode *N); + SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, DebugLoc dl); @@ -362,6 +380,7 @@ private: // Result Float to Integer Conversion. void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); @@ -488,6 +507,7 @@ private: // Vector Result Scalarization: <1 x ty> -> ty. void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); @@ -559,6 +579,7 @@ private: SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); //===--------------------------------------------------------------------===// @@ -581,6 +602,7 @@ private: // Widen Vector Result Promotion. void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); @@ -677,7 +699,8 @@ private: void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); // Generic Result Splitting. - void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -699,6 +722,8 @@ private: } // Generic Result Expansion. + void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 85ea6b6..8e7e498 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -31,6 +31,11 @@ using namespace llvm; // These routines assume that the Lo/Hi part is stored first in memory on // little/big-endian machines, followed by the Hi/Lo part. This means that // they cannot be used as is on vectors, for which Lo is always stored first. +void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetExpandedOp(Op, Lo, Hi); +} void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT OutVT = N->getValueType(0); @@ -426,37 +431,34 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { // bytes; for integers and floats it is Lo first if and only if the machine is // little-endian). -void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi) { - // A MERGE_VALUES node can produce any number of values. We know that the - // first illegal one needs to be expanded into Lo/Hi. - unsigned i; - - // The string of legal results gets turned into input operands, which have - // the same type. - for (i = 0; isTypeLegal(N->getValueType(i)); ++i) - ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); - - // The first illegal result must be the one that needs to be expanded. - GetSplitOp(N->getOperand(i), Lo, Hi); - - // Legalize the rest of the results into the input operands whether they are - // legal or not. - unsigned e = N->getNumValues(); - for (++i; i != e; ++i) - ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetSplitOp(Op, Lo, Hi); } void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue LL, LH, RL, RH; + SDValue LL, LH, RL, RH, CL, CH; DebugLoc dl = N->getDebugLoc(); GetSplitOp(N->getOperand(1), LL, LH); GetSplitOp(N->getOperand(2), RL, RH); SDValue Cond = N->getOperand(0); - Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL); - Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH); + CL = CH = Cond; + if (Cond.getValueType().isVector()) { + assert(Cond.getValueType().getVectorElementType() == MVT::i1 && + "Condition legalized before result?"); + unsigned NumElements = Cond.getValueType().getVectorNumElements(); + EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); + CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getIntPtrConstant(0)); + CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getIntPtrConstant(NumElements / 2)); + } + + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); } void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ffff10c..f815b00 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -61,6 +61,9 @@ class VectorLegalizer { // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement vselect in terms of XOR, AND, OR when blend is not supported + // by the target. + SDValue ExpandVSELECT(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -157,8 +160,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::CTLZ: case ISD::CTPOP: case ISD::SELECT: + case ISD::VSELECT: case ISD::SELECT_CC: - case ISD::VSETCC: + case ISD::SETCC: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::TRUNCATE: @@ -210,11 +214,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::UINT_TO_FP) + if (Node->getOpcode() == ISD::VSELECT) + Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) Result = ExpandFNEG(Op); - else if (Node->getOpcode() == ISD::VSETCC) + else if (Node->getOpcode() == ISD::SETCC) Result = UnrollVSETCC(Op); else Result = DAG.UnrollVectorOp(Op.getNode()); @@ -256,9 +262,41 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } -SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { +SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { + // Implement VSELECT in terms of XOR, AND, OR + // on platforms which do not support blend natively. + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) || + !TLI.isOperationLegalOrCustom(ISD::XOR, VT) || + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + && "Invalid mask size"); + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); + return DAG.getNode(ISD::OR, DL, VT, Op1, Op2); +} +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); DebugLoc DL = Op.getDebugLoc(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index b5698f9..107a42b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -44,8 +44,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to scalarize the result of this operator!"); + report_fatal_error("Do not know how to scalarize the result of this " + "operator!\n"); + case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; @@ -62,8 +64,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; - case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; - case ISD::ANY_EXTEND: case ISD::CTLZ: case ISD::CTPOP: @@ -129,6 +129,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetScalarizedVector(Op); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), @@ -237,6 +243,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + + if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); DebugLoc DL = N->getDebugLoc(); @@ -259,35 +271,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); EVT NVT = N->getValueType(0).getVectorElementType(); - EVT SVT = TLI.getSetCCResultType(LHS.getValueType()); DebugLoc DL = N->getDebugLoc(); // Turn it into a scalar SETCC. - SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2)); - - // VSETCC always returns a sign-extended value, while SETCC may not. The - // SETCC result type may not match the vector element type. Correct these. - if (NVT.bitsLE(SVT)) { - // The SETCC result type is bigger than the vector element type. - // Ensure the SETCC result is sign-extended. - if (TLI.getBooleanContents() != - TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res, - DAG.getValueType(MVT::i1)); - // Truncate to the final type. - return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res); - } - - // The SETCC result type is smaller than the vector element type. - // If the SetCC result is not sign-extended, chop it down to MVT::i1. - if (TLI.getBooleanContents() != - TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res); - // Sign extend to the final type. - return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res); + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + return DAG.getNode(ExtendCode, DL, NVT, Res); } @@ -415,7 +415,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to split the result of this operator!"); - case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::VSELECT: case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -432,7 +433,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::SETCC: - case ISD::VSETCC: SplitVecRes_SETCC(N, Lo, Hi); break; case ISD::VECTOR_SHUFFLE: @@ -524,12 +524,11 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // Handle some special cases efficiently. switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeWidenVector: break; case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: @@ -670,7 +669,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, @@ -740,6 +739,10 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, } void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + EVT LoVT, HiVT; DebugLoc DL = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); @@ -965,7 +968,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"; #endif llvm_unreachable("Do not know how to split this operator's operand!"); - + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -1163,6 +1166,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + // The result has a legal vector type, but the input needs splitting. + SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; + DebugLoc DL = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo0, Hi0); + GetSplitVector(N->getOperand(1), Lo1, Hi1); + unsigned PartElements = Lo0.getValueType().getVectorNumElements(); + EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); + EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); + return PromoteTargetBoolean(Con, N->getValueType(0)); +} + + SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); @@ -1205,6 +1228,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to widen the result of this operator!"); + case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; @@ -1222,10 +1246,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; - case ISD::VSETCC: - Res = WidenVecRes_VSETCC(N); - break; - case ISD::ADD: case ISD::AND: case ISD::BSWAP: @@ -1557,6 +1577,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } +SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { + SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); + return GetWidenedVector(WidenVec); +} + SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -1661,6 +1686,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); bool InputWidened = false; // Indicates we need to widen the input. @@ -1686,17 +1712,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (N->getOperand(i).getOpcode() != ISD::UNDEF) break; - if (i > NumOperands) + if (i == NumOperands) // Everything but the first operand is an UNDEF so just return the // widened first operand. return GetWidenedVector(N->getOperand(0)); if (NumOperands == 2) { // Replace concat of two operands with a shuffle. - SmallVector<int, 16> MaskOps(WidenNumElts); - for (unsigned i=0; i < WidenNumElts/2; ++i) { + SmallVector<int, 16> MaskOps(WidenNumElts, -1); + for (unsigned i = 0; i < NumInElts; ++i) { MaskOps[i] = i; - MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + MaskOps[i + NumInElts] = i + WidenNumElts; } return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), @@ -1708,7 +1734,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { // Fall back to use extracts and build vector. EVT EltVT = WidenVT.getVectorElementType(); - unsigned NumInElts = InVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Idx = 0; for (unsigned i=0; i < NumOperands; ++i) { @@ -1916,6 +1941,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1954,6 +1984,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operands must be vectors"); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1970,7 +2003,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { assert(InOp1.getValueType() == WidenInVT && InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); - return DAG.getNode(ISD::VSETCC, N->getDebugLoc(), + (void)WidenInVT; + return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, InOp1, InOp2, N->getOperand(2)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 12b1838..e757def 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -2621,6 +2621,39 @@ bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { return false; } +/// canClobberReachingPhysRegUse - True if SU would clobber one of it's +/// successor's explicit physregs whose definition can reach DepSU. +/// i.e. DepSU should not be scheduled above SU. +static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, + ScheduleDAGRRList *scheduleDAG, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + const unsigned *ImpDefs + = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + if(!ImpDefs) + return false; + + for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); + SI != SE; ++SI) { + SUnit *SuccSU = SI->getSUnit(); + for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), + PE = SuccSU->Preds.end(); PI != PE; ++PI) { + if (!PI->isAssignedRegDep()) + continue; + + for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries a + // topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + } + } + } + return false; +} + /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's /// physical register defs. static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, @@ -2837,7 +2870,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { SuccOpc == TargetOpcode::INSERT_SUBREG || SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; - if ((!canClobber(SuccSU, DUSU) || + if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && + (!canClobber(SuccSU, DUSU) || (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 35ea0bb..20bea8e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -403,7 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getAlignment()); ID.AddInteger(CP->getOffset()); if (CP->isMachineConstantPoolEntry()) - CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + CP->getMachineCPVal()->addSelectionDAGCSEId(ID); else ID.AddPointer(CP->getConstVal()); ID.AddInteger(CP->getTargetFlags()); @@ -432,7 +432,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: { + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: { const AtomicSDNode *AT = cast<AtomicSDNode>(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); @@ -769,11 +771,14 @@ static void VerifyNodeCommon(SDNode *N) { assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && "Wrong number of operands!"); EVT EltVT = N->getValueType(0).getVectorElementType(); - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { assert((I->getValueType() == EltVT || (EltVT.isInteger() && I->getValueType().isInteger() && EltVT.bitsLE(I->getValueType()))) && "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } break; } } @@ -821,7 +826,7 @@ static void VerifyMachineNode(SDNode *N) { /// given type. /// unsigned SelectionDAG::getEVTAlignment(EVT VT) const { - const Type *Ty = VT == MVT::iPTR ? + Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); @@ -876,6 +881,12 @@ void SelectionDAG::clear() { DbgInfo->clear(); } +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ANY_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : @@ -925,13 +936,25 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); - assert(Val.getBitWidth() == EltVT.getSizeInBits() && - "APInt size does not match type size!"); + const ConstantInt *Elt = &Val; + // In some cases the vector type is legal but the element type is illegal and + // needs to be promoted, for example v8i8 on ARM. In this case, promote the + // inserted value (the type does not need to match the vector element type). + // Any extra bits introduced will be truncated away. + if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + TargetLowering::TypePromoteInteger) { + EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); + } + + assert(Elt->getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); - ID.AddPointer(&Val); + ID.AddPointer(Elt); void *IP = 0; SDNode *N = NULL; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) @@ -939,7 +962,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1131,7 +1154,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(Alignment); ID.AddInteger(Offset); - C->AddSelectionDAGCSEId(ID); + C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) @@ -1432,7 +1455,7 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); - const Type *Ty = VT.getTypeForEVT(*getContext()); + Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); @@ -1445,8 +1468,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Bytes = std::max(VT1.getStoreSizeInBits(), VT2.getStoreSizeInBits())/8; - const Type *Ty1 = VT1.getTypeForEVT(*getContext()); - const Type *Ty2 = VT2.getTypeForEVT(*getContext()); + Type *Ty1 = VT1.getTypeForEVT(*getContext()); + Type *Ty2 = VT2.getTypeForEVT(*getContext()); const TargetData *TD = TLI.getTargetData(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1718,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent && - BitWidth > 1) + if (TLI.getBooleanContents(Op.getValueType().isVector()) == + TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; case ISD::SHL: @@ -2153,7 +2176,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI.getBooleanContents() == + if (TLI.getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2437,7 +2460,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, APFloat::rmTowardZero, &ignored); if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; - APInt api(VT.getSizeInBits(), 2, x); + APInt api(VT.getSizeInBits(), x); return getConstant(api, VT); } case ISD::BITCAST: @@ -2777,6 +2800,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, EVT.getVectorNumElements() == VT.getVectorNumElements()) && "Vector element counts must match in FP_ROUND_INREG"); assert(EVT.bitsLE(VT) && "Not rounding down!"); + (void)EVT; if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. break; } @@ -2884,6 +2908,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); assert(!N1.getValueType().isVector() && !VT.isVector() && (N1.getValueType().isInteger() == VT.isInteger()) && + N1.getValueType() != VT && "Wrong types for EXTRACT_ELEMENT!"); // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding @@ -3425,7 +3450,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, return SDValue(); if (DstAlignCanChange) { - const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. @@ -3514,7 +3539,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, return SDValue(); if (DstAlignCanChange) { - const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. @@ -3589,7 +3614,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, return SDValue(); if (DstAlignCanChange) { - const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. @@ -3782,7 +3807,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); + Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -3815,7 +3840,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment) { + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -3823,18 +3850,23 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; // For now, atomics are considered to be volatile always. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, + Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, - SDValue Swp, MachineMemOperand *MMO) { + SDValue Swp, MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -3851,7 +3883,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, return SDValue(E, 0); } SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO); + Ptr, Cmp, Swp, MMO, Ordering, + SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3861,27 +3894,39 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, - unsigned Alignment) { + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // A monotonic store does not load; a release store "loads" in the sense + // that other stores cannot be sunk past it. + // (An atomicrmw obviously both loads and stores.) + unsigned Flags = MachineMemOperand::MOStore; + if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) + Flags |= MachineMemOperand::MOLoad; // For now, atomics are considered to be volatile always. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO, + Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || @@ -3892,12 +3937,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX || - Opcode == ISD::ATOMIC_SWAP) && + Opcode == ISD::ATOMIC_SWAP || + Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); EVT VT = Val.getValueType(); - SDVTList VTs = getVTList(VT, MVT::Other); + SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : + getVTList(VT, MVT::Other); FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; @@ -3908,7 +3955,63 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, return SDValue(E, 0); } SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Val, MMO); + Ptr, Val, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + EVT VT, SDValue Chain, + SDValue Ptr, + const Value* PtrVal, + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + // A monotonic load does not store; an acquire load "stores" in the sense + // that other loads cannot be hoisted past it. + unsigned Flags = MachineMemOperand::MOLoad; + if (Ordering > Monotonic) + Flags |= MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + EVT VT, SDValue Chain, + SDValue Ptr, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, MMO, Ordering, SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -5769,6 +5872,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { #endif case ISD::PREFETCH: return "Prefetch"; case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; @@ -5781,6 +5885,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; @@ -5896,8 +6002,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; - case ISD::VSETCC: return "vsetcc"; case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; @@ -5985,7 +6091,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTLZ: return "ctlz"; // Trampolines - case ISD::TRAMPOLINE: return "trampoline"; + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; case ISD::CONDCODE: switch (cast<CondCodeSDNode>(this)->get()) { @@ -6245,8 +6352,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, const SelectionDAG *G, unsigned depth, - unsigned indent) -{ + unsigned indent) { if (depth == 0) return; @@ -6340,6 +6446,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, &Operands[0], Operands.size())); break; + case ISD::VSELECT: + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, + &Operands[0], Operands.size())); + break; case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -6427,6 +6537,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { Align = TD->getPreferredAlignment(GVar); } } + if (!Align) + Align = TLI.getTargetData()->getABITypeAlignment(GV->getType()); } return MinAlign(Align, GVOffset); } @@ -6528,7 +6640,7 @@ unsigned GlobalAddressSDNode::getAddressSpace() const { } -const Type *ConstantPoolSDNode::getType() const { +Type *ConstantPoolSDNode::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); return Val.ConstVal->getType(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 81b03ee..7ed46a6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -578,7 +578,7 @@ namespace { : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) { + unsigned Reg, Type *Ty) { ComputeValueVTs(tli, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { @@ -788,6 +788,18 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + else if (!Regs.empty() && + TargetRegisterInfo::isVirtualRegister(Regs.front())) { + // Put the register class of the virtual registers in the flag word. That + // way, later passes can recompute register class constraints for inline + // assembly as well as normal instructions. + // Don't do this for tied operands that can use the regclass information + // from the def. + const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); @@ -805,6 +817,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; GFI = gfi; TD = DAG.getTarget().getTargetData(); + LPadToCallSiteMap.clear(); } /// clear - Clear out the current SelectionDAG and the associated @@ -956,7 +969,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } } -// getValue - Return an SDValue for the given Value. +/// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important // to do this first, so that we don't create a CopyFromReg if we already @@ -971,7 +984,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); resolveDanglingDebugInfo(V, N); return N; } @@ -1069,7 +1082,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) return DAG.getBlockAddress(BA, VT); - const VectorType *VecTy = cast<VectorType>(V->getType()); + VectorType *VecTy = cast<VectorType>(V->getType()); unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of @@ -1277,15 +1290,17 @@ uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, BranchProbabilityInfo *BPI = FuncInfo.BPI; if (!BPI) return 0; - BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); - BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); return BPI->getEdgeWeight(SrcBB, DstBB); } -void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) { - uint32_t weight = getEdgeWeight(Src, Dst); - Src->addSuccessor(Dst, weight); +void SelectionDAGBuilder:: +addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, + uint32_t Weight /* = 0 */) { + if (!Weight) + Weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, Weight); } @@ -1558,8 +1573,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - addSuccessorWithWeight(SwitchBB, CB.TrueBB); - addSuccessorWithWeight(SwitchBB, CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1677,7 +1692,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) - if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) { + if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. // Just use pointer type, it's guaranteed to fit. UsePtrType = true; @@ -1808,6 +1823,49 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { } +void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); +} + +void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { + assert(FuncInfo.MBB->isLandingPad() && + "Call to landingpad not in landing pad!"); + + MachineBasicBlock *MBB = FuncInfo.MBB; + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + AddLandingPadInfo(LP, MMI, MBB); + + SmallVector<EVT, 2> ValueVTs; + ComputeValueVTs(TLI, LP.getType(), ValueVTs); + + // Insert the EXCEPTIONADDR instruction. + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = DAG.getRoot(); + SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1); + SDValue Chain = Op1.getValue(1); + + // Insert the EHSELECTION instruction. + VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + Ops[0] = Op1; + Ops[1] = Chain; + SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2); + Chain = Op2.getValue(1); + Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32); + + Ops[0] = Op1; + Ops[1] = Op2; + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Ops[0], 2); + + std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain); + setValue(&LP, RetPair.first); + DAG.setRoot(RetPair.second); +} + /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, @@ -1866,8 +1924,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - SwitchBB->addSuccessor(Small.BB); - SwitchBB->addSuccessor(Default); + addSuccessorWithWeight(SwitchBB, Small.BB); + addSuccessorWithWeight(SwitchBB, Default); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1923,7 +1981,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } - CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + uint32_t ExtraWeight = I->ExtraWeight; + CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, + /* me */ CurBlock, + /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -1953,10 +2015,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) { } /// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, + CaseRecVector &WorkList, + const Value *SV, + MachineBasicBlock *Default, MachineBasicBlock *SwitchBB) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1965,8 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); if (!areJTsAllowed(TLI) || TSize.ult(4)) @@ -2044,7 +2105,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, visitJumpTableHeader(JT, JTH, SwitchBB); JTCases.push_back(JumpTableBlock(JTH, JT)); - return true; } @@ -2318,12 +2378,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { size_t numCmps = 0; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + BasicBlock *SuccBB = SI.getSuccessor(i); + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; + + uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; + Cases.push_back(Case(SI.getSuccessorValue(i), SI.getSuccessorValue(i), - SMBB)); + SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2343,6 +2408,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { I->High = J->High; J = Cases.erase(J); + + if (BranchProbabilityInfo *BPI = FuncInfo.BPI) { + uint32_t CurWeight = currentBB->getBasicBlock() ? + BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16; + uint32_t NextWeight = nextBB->getBasicBlock() ? + BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16; + + BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(), + CurWeight + NextWeight); + } } else { I = J++; } @@ -2379,7 +2454,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. - if (SI.getNumOperands() == 2) { + if (SI.getNumCases() == 1) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. @@ -2399,12 +2474,12 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { size_t numCmps = Clusterify(Cases, SI); DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << '\n'); - numCmps = 0; + (void)numCmps; // Get the Value to be switched on and default basic blocks, which will be // inserted into CaseBlock records, representing basic blocks in the binary // search tree. - const Value *SV = SI.getOperand(0); + const Value *SV = SI.getCondition(); // Push the initial CaseRec onto the worklist CaseRecVector WorkList; @@ -2458,7 +2533,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (isa<Constant>(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); @@ -2562,10 +2637,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue Cond = getValue(I.getOperand(0)); SDValue TrueVal = getValue(I.getOperand(1)); SDValue FalseVal = getValue(I.getOperand(2)); + ISD::NodeType OpCode = Cond.getValueType().isVector() ? + ISD::VSELECT : ISD::SELECT; for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), + Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), Cond, SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), @@ -2778,7 +2855,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MinRange[2] = { static_cast<int>(SrcNumElts+1), + static_cast<int>(SrcNumElts+1)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { @@ -2886,8 +2964,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); - const Type *AggTy = I.getType(); - const Type *ValTy = Op1->getType(); + Type *AggTy = I.getType(); + Type *ValTy = Op1->getType(); bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); @@ -2927,8 +3005,8 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); - const Type *AggTy = Op0->getType(); - const Type *ValTy = I.getType(); + Type *AggTy = Op0->getType(); + Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); @@ -2961,12 +3039,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; - if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + if (StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -3037,7 +3115,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. - const Type *Ty = I.getAllocatedType(); + Type *Ty = I.getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), @@ -3084,10 +3162,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + if (I.isAtomic()) + return visitAtomicLoad(I); + const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); - const Type *Ty = I.getType(); + Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; @@ -3161,6 +3242,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStore(const StoreInst &I) { + if (I.isAtomic()) + return visitAtomicStore(I); + const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); @@ -3211,6 +3295,179 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } +static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, + SynchronizationScope Scope, + bool Before, DebugLoc dl, + SelectionDAG &DAG, + const TargetLowering &TLI) { + // Fence, if necessary + if (Before) { + if (Order == AcquireRelease || Order == SequentiallyConsistent) + Order = Release; + else if (Order == Acquire || Order == Monotonic) + return Chain; + } else { + if (Order == AcquireRelease) + Order = Acquire; + else if (Order == Release || Order == Monotonic) + return Chain; + } + SDValue Ops[3]; + Ops[0] = Chain; + Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); +} + +void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), + MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + DebugLoc dl = getCurDebugLoc(); + ISD::NodeType NT; + switch (I.getOperation()) { + default: llvm_unreachable("Unknown atomicrmw operation"); return; + case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; + case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; + case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; + case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; + case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; + case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; + case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; + case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; + case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; + case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; + case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + } + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(NT, dl, + getValue(I.getValOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValOperand()), + I.getPointerOperand(), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitFence(const FenceInst &I) { + DebugLoc dl = getCurDebugLoc(); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); +} + +void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = EVT::getEVT(I.getType()); + + if (I.getAlignment() * 8 < VT.getSizeInBits()) + report_fatal_error("Cannot generate unaligned atomic load"); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { + DebugLoc dl = getCurDebugLoc(); + + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + + if (I.getAlignment() * 8 < VT.getSizeInBits()) + report_fatal_error("Cannot generate unaligned atomic store"); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValueOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + DAG.setRoot(OutChain); +} + /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, @@ -3290,7 +3547,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } if (!I.getType()->isVoidTy()) { - if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); } @@ -3337,25 +3594,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -/// Inlined utility function to implement binary input atomic intrinsics for -/// visitIntrinsicCall: I is a call instruction -/// Op is the associated NodeType for I -const char * -SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, - ISD::NodeType Op) { - SDValue Root = getRoot(); - SDValue L = - DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), - Root, - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - I.getArgOperand(0)); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; -} - // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { @@ -4154,17 +4392,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, return false; unsigned Reg = 0; - if (Arg->hasByValAttr()) { - // Byval arguments' frame index is recorded during argument lowering. - // Use this info directly. - Reg = TRI->getFrameRegister(MF); - Offset = FuncInfo.getByValArgumentFrameIndex(Arg); - // If byval argument ofset is not recorded then ignore this. - if (!Offset) - Reg = 0; - } + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI->getFrameRegister(MF); - if (N.getNode()) { + if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); else @@ -4295,7 +4528,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(DI.getVariable()).Verify()) + if (!Address || !DIVariable(Variable).Verify()) return 0; // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder @@ -4385,7 +4618,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // debug info exists. ++SDNodeOrder; SDDbgValue *SDV; - if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) { + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { @@ -4514,9 +4747,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setCurrentCallSite(CI->getZExtValue()); return 0; } + case Intrinsic::eh_sjlj_functioncontext: { + // Get and store the index of the function context. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + AllocaInst *FnCtx = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + int FI = FuncInfo.StaticAllocaMap[FnCtx]; + MFI->setFunctionContextIndex(FI); + return 0; + } case Intrinsic::eh_sjlj_setjmp: { - setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), - getValue(I.getArgOperand(0)))); + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), + Ops, 2); + setValue(&I, Op.getValue(0)); + DAG.setRoot(Op.getValue(1)); return 0; } case Intrinsic::eh_sjlj_longjmp: { @@ -4778,12 +5026,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::TRAMPOLINE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), - Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); - setValue(&I, Res); - DAG.setRoot(Res.getValue(1)); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, + TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::gcroot: @@ -4857,51 +5108,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } - case Intrinsic::memory_barrier: { - SDValue Ops[6]; - Ops[0] = getRoot(); - for (int x = 1; x < 6; ++x) - Ops[x] = getValue(I.getArgOperand(x - 1)); - - DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); - return 0; - } - case Intrinsic::atomic_cmp_swap: { - SDValue Root = getRoot(); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), - getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), - Root, - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)), - MachinePointerInfo(I.getArgOperand(0))); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; - } - case Intrinsic::atomic_load_add: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); - case Intrinsic::atomic_load_sub: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); - case Intrinsic::atomic_load_or: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); - case Intrinsic::atomic_load_xor: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); - case Intrinsic::atomic_load_and: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); - case Intrinsic::atomic_load_nand: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); - case Intrinsic::atomic_load_max: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); - case Intrinsic::atomic_load_min: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); - case Intrinsic::atomic_load_umin: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); - case Intrinsic::atomic_load_umax: - return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); - case Intrinsic::atomic_swap: - return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); case Intrinsic::invariant_start: case Intrinsic::lifetime_start: @@ -4918,9 +5124,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { - const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - const Type *RetTy = FTy->getReturnType(); + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = 0; @@ -4949,7 +5155,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; @@ -4997,6 +5203,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); } @@ -5037,7 +5245,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // The instruction result is the result of loading from the // hidden sret parameter. SmallVector<EVT, 1> PVTs; - const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -5130,7 +5338,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - const Type *LoadTy, + Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the @@ -5193,7 +5401,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; - const Type *LoadTy; + Type *LoadTy; switch (Size->getZExtValue()) { default: LoadVT = MVT::Other; @@ -5261,14 +5469,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // See if any floating point values are being passed to this function. This is // used to emit an undefined reference to fltused on Windows. - const FunctionType *FT = + FunctionType *FT = cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (FT->isVarArg() && !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - const Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<const Type*> i = po_begin(T), e = po_end(T); + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<Type*> i = po_begin(T), e = po_end(T); i != e; ++i) { if (!i->isFloatingPointTy()) continue; MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); @@ -5412,20 +5620,20 @@ public: if (isa<BasicBlock>(CallOperandVal)) return TLI.getPointerTy(); - const llvm::Type *OpTy = CallOperandVal->getType(); + llvm::Type *OpTy = CallOperandVal->getType(); // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. - if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (StructType *STy = dyn_cast<StructType>(OpTy)) if (STy->getNumElements() == 1) OpTy = STy->getElementType(0); @@ -5637,9 +5845,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(!CS.getType()->isVoidTy() && - "Bad inline asm!"); - if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); @@ -5707,9 +5914,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -5750,7 +5959,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } else { // Otherwise, create a stack slot and emit a store to it before the // asm. - const Type *Ty = OpVal->getType(); + Type *Ty = OpVal->getType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); @@ -6111,7 +6320,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> -TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, +TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, @@ -6128,7 +6337,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); + Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -6145,8 +6354,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setSRet(); if (Args[i].isByVal) { Flags.setByVal(); - const PointerType *Ty = cast<PointerType>(Args[i].Ty); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(Args[i].Ty); + Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. @@ -6356,7 +6565,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); @@ -6371,8 +6580,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { Flags.setSRet(); if (F.paramHasAttr(Idx, Attribute::ByVal)) { Flags.setByVal(); - const PointerType *Ty = cast<PointerType>(I->getType()); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(I->getType()); + Type *ElementTy = Ty->getElementType(); Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -6487,15 +6696,22 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (ArgValues.empty()) continue; - // Note down frame index for byval arguments. - if (I->hasByValAttr()) - if (FrameIndexSDNode *FI = - dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + // Note down frame index. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, SDB->getCurDebugLoc()); + SDB->setValue(I, Res); + if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (LoadSDNode *LNode = + dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a0884eb..0a21ca3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -132,10 +132,13 @@ private: Constant* Low; Constant* High; MachineBasicBlock* BB; + uint32_t ExtraWeight; + + Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb, + uint32_t extraweight) : Low(low), High(high), BB(bb), + ExtraWeight(extraweight) { } - Case() : Low(0), High(0), BB(0) { } - Case(Constant* low, Constant* high, MachineBasicBlock* bb) : - Low(low), High(high), BB(bb) { } APInt size() const { const APInt &rHigh = cast<ConstantInt>(High)->getValue(); const APInt &rLow = cast<ConstantInt>(Low)->getValue(); @@ -203,20 +206,30 @@ private: CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, const Value *cmpmiddle, MachineBasicBlock *truebb, MachineBasicBlock *falsebb, - MachineBasicBlock *me) + MachineBasicBlock *me, + uint32_t trueweight = 0, uint32_t falseweight = 0) : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {} + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), + TrueWeight(trueweight), FalseWeight(falseweight) { } + // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. // Emit by default LHS op RHS. MHS is used for range comparisons: // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). const Value *CmpLHS, *CmpMHS, *CmpRHS; + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. MachineBasicBlock *TrueBB, *FalseBB; + // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; + + // TrueWeight/FalseWeight - branch weights. + uint32_t TrueWeight, FalseWeight; }; + struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} @@ -307,6 +320,9 @@ public: /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; + /// LPadToCallSiteMap - Map a landing pad to the call site indexes. + DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap; + /// HasTailCall - This is set to true if a call in the current /// block has been translated as a tail call. In this case, /// no subsequent DAG nodes should be created. @@ -436,7 +452,8 @@ private: MachineBasicBlock *SwitchBB); uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); - void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, + uint32_t Weight = 0); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -453,6 +470,7 @@ public: private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); + void visitResume(const ResumeInst &I); void visitUnwind(const UnwindInst &I); void visitBinary(const User &I, unsigned OpCode); @@ -497,6 +515,7 @@ private: void visitExtractValue(const ExtractValueInst &I); void visitInsertValue(const InsertValueInst &I); + void visitLandingPad(const LandingPadInst &I); void visitGetElementPtr(const User &I); void visitSelect(const User &I); @@ -504,10 +523,15 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); + void visitAtomicRMW(const AtomicRMWInst &I); + void visitFence(const FenceInst &I); void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); - + void visitAtomicLoad(const LoadInst &I); + void visitAtomicStore(const StoreInst &I); + void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); @@ -531,7 +555,6 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op); const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 87bb296..68b9146 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -177,6 +177,13 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return 0; } +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + assert(!MI->getDesc().hasPostISelHook() && + "If a target marks an instruction with 'hasPostISelHook', " + "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); +} + //===----------------------------------------------------------------------===// // SelectionDAGISel code //===----------------------------------------------------------------------===// @@ -463,6 +470,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { GroupName = "Instruction Selection and Scheduling"; std::string BlockName; int BlockNumber = -1; + (void)BlockNumber; #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || @@ -677,21 +685,26 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. void SelectionDAGISel::PrepareEHLandingPad() { + MachineBasicBlock *MBB = FuncInfo->MBB; + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); + MCSymbol *Label = MF->getMMI().addLandingPad(MBB); + // Assign the call site to the landing pad's begin label. + MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); - BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) FuncInfo->MBB->addLiveIn(Reg); + if (Reg) MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo->MBB->addLiveIn(Reg); + if (Reg) MBB->addLiveIn(Reg); // FIXME: Hack around an exception handling flaw (PR1508): the personality // function and list of typeids logically belong to the invoke (or, if you @@ -704,7 +717,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // in exceptions not being caught because no typeids are associated with // the invoke. This may not be the only way things can go wrong, but it // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -719,8 +732,6 @@ void SelectionDAGISel::PrepareEHLandingPad() { } } - - /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified /// load into the specified FoldInst. Note that we could have a sequence where /// multiple LLVM IR instructions are folded into the same machineinstr. For @@ -741,7 +752,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, // isn't one of the folded instructions, then we can't succeed here. Handle // this by scanning the single-use users of the load until we get to FoldInst. unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - + const Instruction *TheUser = LI->use_back(); while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. @@ -750,10 +761,15 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, // If there are multiple or no uses of this instruction, then bail out. if (!TheUser->hasOneUse()) return false; - + TheUser = TheUser->use_back(); } - + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; @@ -802,6 +818,7 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. !isa<TerminatorInst>(I) && // Terminators aren't folded. !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2626ac3..907d8d9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -317,7 +317,7 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; @@ -609,6 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; @@ -617,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -914,7 +916,8 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { } -MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const { +EVT TargetLowering::getSetCCResultType(EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); return PointerTy.SimpleTy; } @@ -996,7 +999,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, +void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, SmallVectorImpl<ISD::OutputArg> &Outs, const TargetLowering &TLI, SmallVectorImpl<uint64_t> *Offsets) { @@ -1054,7 +1057,7 @@ void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. -unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { +unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const { return TD->getCallFrameTypeAlignment(Ty); } @@ -1764,17 +1767,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - // Demand all the bits of the input that are demanded in the output. - // The low bits are obvious; the high bits are demanded because we're - // asserting that they're zero here. - if (SimplifyDemandedBits(Op.getOperand(0), NewMask, + // AssertZext demands all of the high bits, plus any of the low bits + // demanded by its users. + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); KnownZero |= ~InMask & NewMask; break; } @@ -2191,7 +2193,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents() == ZeroOrOneBooleanContent)) { + getBooleanContents(false) == ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -2758,16 +2760,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. - bool isLegal = false; - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (isTypeLegal(*I)) { - isLegal = true; - break; - } - } - - if (!isLegal) continue; + if (!isLegalRC(RC)) + continue; for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { @@ -2840,7 +2834,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); @@ -2857,16 +2851,16 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } if (OpInfo.CallOperandVal) { - const llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); if (OpInfo.isIndirect) { - const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. - if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (StructType *STy = dyn_cast<StructType>(OpTy)) if (STy->getNumElements() == 1) OpTy = STy->getElementType(0); @@ -3187,7 +3181,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp index 5a253a4..2609256 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -61,7 +61,7 @@ namespace { private: bool IsNullValue(Value *V); Constant *GetFrameMap(Function &F); - const Type* GetConcreteStackEntryType(Function &F); + Type* GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, @@ -109,13 +109,15 @@ namespace { State = 1; case 1: - // Find all 'return' and 'unwind' instructions. + // Find all 'return', 'resume', and 'unwind' instructions. while (StateBB != StateE) { BasicBlock *CurBB = StateBB++; - // Branches and invokes do not escape, only unwind and return do. + // Branches and invokes do not escape, only unwind, resume, and return + // do. TerminatorInst *TI = CurBB->getTerminator(); - if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI)) + if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) && + !isa<ResumeInst>(TI)) continue; Builder.SetInsertPoint(TI->getParent(), TI); @@ -139,9 +141,19 @@ namespace { return 0; // Create a cleanup block. - BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(), - CleanupBBName, &F); - UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB); + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), + Type::getInt32Ty(C), NULL); + Constant *PersFn = + F.getParent()-> + getOrInsertFunction("__gcc_personality_v0", + FunctionType::get(Type::getInt32Ty(C), true)); + LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1, + "cleanup.lpad", + CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); // Transform the 'call' instructions into 'invoke's branching to the // cleanup block. Go in reverse order to make prettier BB names. @@ -172,7 +184,7 @@ namespace { delete CI; } - Builder.SetInsertPoint(UI->getParent(), UI); + Builder.SetInsertPoint(RI->getParent(), RI); return &Builder; } } @@ -190,7 +202,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { Constant *ShadowStackGC::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. - const Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; @@ -203,7 +215,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { } Metadata.resize(NumMeta); - const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Type *Int32Ty = Type::getInt32Ty(F.getContext()); Constant *BaseElts[] = { ConstantInt::get(Int32Ty, Roots.size(), false), @@ -216,7 +228,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { }; Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; - StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys); + StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta)); Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); @@ -241,17 +253,17 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; - return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); + return ConstantExpr::getGetElementPtr(GV, GEPIndices); } -const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { +Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. std::vector<Type*> EltTys; EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys); + return StructType::create(EltTys, "gc_stackentry."+F.getName().str()); } /// doInitialization - If this module uses the GC intrinsics, find them now. If @@ -267,7 +279,7 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { EltTys.push_back(Type::getInt32Ty(M.getContext())); // Specifies length of variable length array. EltTys.push_back(Type::getInt32Ty(M.getContext())); - FrameMapTy = StructType::createNamed("gc_map", EltTys); + FrameMapTy = StructType::create(EltTys, "gc_map"); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); // struct StackEntry { @@ -276,13 +288,13 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry"); + StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); EltTys.clear(); EltTys.push_back(PointerType::getUnqual(StackEntryTy)); EltTys.push_back(FrameMapPtrTy); StackEntryTy->setBody(EltTys); - const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); // Get the root chain if it already exists. Head = M.getGlobalVariable("llvm_gc_root_chain"); @@ -340,7 +352,7 @@ ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx), ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; - Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); + Value* Val = B.CreateGEP(BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -352,7 +364,7 @@ ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, int Idx, const char *Name) { Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx) }; - Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); + Value *Val = B.CreateGEP(BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -373,7 +385,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) { // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); - const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); // Build the shadow stack entry at the very start of the function. BasicBlock::iterator IP = F.getEntryBlock().begin(); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 65a33da..ded2459d 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -21,26 +21,31 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; +static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden, + cl::desc("Disable the old SjLj EH preparation pass")); + STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumUnwinds, "Number of unwinds replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPass : public FunctionPass { - const TargetLowering *TLI; - - const Type *FunctionContextTy; + Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; Constant *BuiltinSetjmpFn; @@ -53,8 +58,9 @@ namespace { Constant *ExceptionFn; Constant *CallSiteFn; Constant *DispatchSetupFn; - + Constant *FuncCtxFn; Value *CallSite; + DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap; public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPass(const TargetLowering *tli = NULL) @@ -62,16 +68,22 @@ namespace { bool doInitialization(Module &M); bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} const char *getPassName() const { return "SJLJ Exception Handling preparation"; } private: + bool setupEntryBlockAndCallSites(Function &F); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); + void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); + void splitLandingPad(InvokeInst *II); bool insertSjLjEHSupport(Function &F); }; } // end anonymous namespace @@ -116,6 +128,7 @@ bool SjLjEHPass::doInitialization(Module &M) { CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); DispatchSetupFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); + FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); PersonalityFn = 0; return true; @@ -131,6 +144,42 @@ void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, new StoreInst(CallSiteNoC, CallSite, true, I); // volatile } +/// splitLandingPad - Split a landing pad. This takes considerable care because +/// of PHIs and other nasties. The problem is that the jump table needs to jump +/// to the landing pad block. However, the landing pad block can be jumped to +/// only by an invoke instruction. So we clone the landingpad instruction into +/// its own basic block, have the invoke jump to there. The landingpad +/// instruction's basic block's successor is now the target for the jump table. +/// +/// But because of PHI nodes, we need to create another basic block for the jump +/// table to jump to. This is definitely a hack, because the values for the PHI +/// nodes may not be defined on the edge from the jump table. But that's okay, +/// because the jump table is simply a construct to mimic what is happening in +/// the CFG. So the values are mysteriously there, even though there is no value +/// for the PHI from the jump table's edge (hence calling this a hack). +void SjLjEHPass::splitLandingPad(InvokeInst *II) { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(), + ".1", ".2", this, NewBBs); + + // Create an empty block so that the jump table has something to jump to + // which doesn't have any PHI nodes. + BasicBlock *LPad = NewBBs[0]; + BasicBlock *Succ = *succ_begin(LPad); + BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land", + LPad->getParent(), Succ); + LPad->getTerminator()->eraseFromParent(); + BranchInst::Create(JumpTo, LPad); + BranchInst::Create(Succ, JumpTo); + LPadSuccMap[II] = JumpTo; + + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + Value *Val = PN->removeIncomingValue(LPad, false); + PN->addIncoming(Val, JumpTo); + } +} + /// markInvokeCallSite - Insert code to mark the call_site for this invoke void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, @@ -140,11 +189,15 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, // The runtime comes back to the dispatcher with the call_site - 1 in // the context. Odd, but there it is. ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo - 1); + InvokeNo - 1); // If the unwind edge has phi nodes, split the edge. if (isa<PHINode>(II->getUnwindDest()->begin())) { - SplitCriticalEdge(II, 1, this); + // FIXME: New EH - This if-condition will be always true in the new scheme. + if (II->getUnwindDest()->isLandingPad()) + splitLandingPad(II); + else + SplitCriticalEdge(II, 1, this); // If there are any phi nodes left, they must have a single predecessor. while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { @@ -161,7 +214,12 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, CallInst::Create(CallSiteFn, CallSiteNoC, "", II); // Add a switch case to our unwind block. - CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); + if (BasicBlock *SuccBB = LPadSuccMap[II]) { + CatchSwitch->addCase(SwitchValC, SuccBB); + } else { + CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); + } + // We still want this to look like an invoke so we emit the LSDA properly, // so we don't transform the invoke into a call here. } @@ -187,10 +245,16 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); - SplitCriticalEdge(II, 1, this); + + // FIXME: New EH - This if-condition will be always true in the new scheme. + if (II->getUnwindDest()->isLandingPad()) + splitLandingPad(II); + else + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && - "critical edge splitting left single entry phi nodes?"); + "Critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); @@ -204,7 +268,7 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - const Type *Ty = AI->getType(); + Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. @@ -283,9 +347,8 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) NeedsSpill = true; - } } // If we decided we need a spill, do it. @@ -299,6 +362,44 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { } } +/// CreateLandingPadLoad - Load the exception handling values and insert them +/// into a structure. +static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr, + Value *SelAddr, + BasicBlock::iterator InsertPt) { + Value *Exn = new LoadInst(ExnAddr, "exn", false, + InsertPt); + Type *Ty = Type::getInt8PtrTy(F.getContext()); + Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt); + Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt); + + Ty = StructType::get(Exn->getType(), Sel->getType(), NULL); + InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty), + Exn, 0, + "lpad.val", InsertPt); + return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt); +} + +/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a +/// load from the stored values (via CreateLandingPadLoad). This looks through +/// PHI nodes, and removes them if they are dead. +static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr, + Value *SelAddr) { + if (Inst->use_empty()) return; + + while (!Inst->use_empty()) { + Instruction *I = cast<Instruction>(Inst->use_back()); + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr); + if (PN->use_empty()) PN->eraseFromParent(); + continue; + } + + I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I)); + } +} + bool SjLjEHPass::insertSjLjEHSupport(Function &F) { SmallVector<ReturnInst*,16> Returns; SmallVector<UnwindInst*,16> Unwinds; @@ -337,10 +438,23 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { SmallVector<CallInst*,16> EH_Exceptions; SmallVector<Instruction*,16> JmpbufUpdatePoints; - // Note: Skip the entry block since there's nothing there that interests - // us. eh.selector and eh.exception shouldn't ever be there, and we - // want to disregard any allocas that are there. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + // Note: Skip the entry block since there's nothing there that interests + // us. eh.selector and eh.exception shouldn't ever be there, and we + // want to disregard any allocas that are there. + // + // FIXME: This is awkward. The new EH scheme won't need to skip the entry + // block. + if (BB == F.begin()) { + if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) { + // FIXME: This will be always non-NULL in the new EH. + if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) + if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); + } + + continue; + } + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->getCalledFunction() == SelectorFn) { @@ -353,6 +467,10 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { } } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { JmpbufUpdatePoints.push_back(AI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { + // FIXME: This will be always non-NULL in the new EH. + if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) + if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); } } } @@ -371,6 +489,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // invoke's. splitLiveRangesAcrossInvokes(Invokes); + + SmallVector<LandingPadInst*, 16> LandingPads; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) + // FIXME: This will be always non-NULL in the new EH. + if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) + LandingPads.push_back(LPI); + } + + BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an @@ -381,27 +509,25 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { "fcn_context", F.begin()->begin()); Value *Idxs[2]; - const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *Zero = ConstantInt::get(Int32Ty, 0); // We need to also keep around a reference to the call_site field Idxs[0] = Zero; Idxs[1] = ConstantInt::get(Int32Ty, 1); - CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "call_site", + CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site", EntryBB->getTerminator()); // The exception selector comes back in context->data[1] Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "fc_data", + Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data", EntryBB->getTerminator()); Idxs[1] = ConstantInt::get(Int32Ty, 1); - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, "exc_selector_gep", EntryBB->getTerminator()); // The exception value comes back in context->data[0] Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, "exception_gep", EntryBB->getTerminator()); @@ -423,13 +549,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // instruction hasn't already been removed. if (!I->getParent()) continue; Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - const Type *Ty = Type::getInt8PtrTy(F.getContext()); + Type *Ty = Type::getInt8PtrTy(F.getContext()); Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); I->replaceAllUsesWith(Val); I->eraseFromParent(); } + for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) + ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr); + // The entry block changes to have the eh.sjlj.setjmp, with a conditional // branch to a dispatch block for non-zero returns. If we return normally, // we're not handling an exception and just register the function context and @@ -466,8 +595,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Idxs[0] = Zero; Idxs[1] = ConstantInt::get(Int32Ty, 4); Value *LSDAFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "lsda_gep", + GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", EntryBB->getTerminator()); Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", EntryBB->getTerminator()); @@ -475,8 +603,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Idxs[1] = ConstantInt::get(Int32Ty, 3); Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "lsda_gep", + GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", EntryBB->getTerminator()); new StoreInst(PersonalityFn, PersonalityFieldPtr, true, EntryBB->getTerminator()); @@ -484,12 +611,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Save the frame pointer. Idxs[1] = ConstantInt::get(Int32Ty, 5); Value *JBufPtr - = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "jbuf_gep", + = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep", EntryBB->getTerminator()); Idxs[1] = ConstantInt::get(Int32Ty, 0); Value *FramePtr = - GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", + GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", EntryBB->getTerminator()); Value *Val = CallInst::Create(FrameAddrFn, @@ -501,7 +627,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Save the stack pointer. Idxs[1] = ConstantInt::get(Int32Ty, 2); Value *StackPtr = - GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", EntryBB->getTerminator()); Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); @@ -513,7 +639,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, - "dispatch", + "", EntryBB->getTerminator()); // Add a call to dispatch_setup after the setjmp call. This is expanded to any @@ -554,6 +680,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { if (Callee != SelectorFn && Callee != ExceptionFn && !CI->doesNotThrow()) insertCallSiteStore(CI, -1, CallSite); + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { + insertCallSiteStore(RI, -1, CallSite); } } @@ -582,7 +710,317 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { return true; } +/// setupFunctionContext - Allocate the function context on the stack and fill +/// it with all of the data that we know at this point. +Value *SjLjEHPass:: +setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an alloca + // because the value needs to be added to the global context list. + unsigned Align = + TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy); + AllocaInst *FuncCtx = + new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + + // Fill in the function context structure. + Value *Idxs[2]; + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + Value *One = ConstantInt::get(Int32Ty, 1); + + // Keep around a reference to the call_site field. + Idxs[0] = Zero; + Idxs[1] = One; + CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site", + EntryBB->getTerminator()); + + // Reference the __data field. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data", + EntryBB->getTerminator()); + + // The exception value comes back in context->__data[0]. + Idxs[1] = Zero; + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, + "exception_gep", + EntryBB->getTerminator()); + + // The exception selector comes back in context->__data[1]. + Idxs[1] = One; + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, + "exn_selector_gep", + EntryBB->getTerminator()); + + for (unsigned I = 0, E = LPads.size(); I != E; ++I) { + LandingPadInst *LPI = LPads[I]; + IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + + Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); + ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); + + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); + + LPI->replaceAllUsesWith(LPadVal); + } + + // Personality function + Idxs[1] = ConstantInt::get(Int32Ty, 3); + if (!PersonalityFn) + PersonalityFn = LPads[0]->getPersonalityFn(); + Value *PersonalityFieldPtr = + GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep", + EntryBB->getTerminator()); + new StoreInst(PersonalityFn, PersonalityFieldPtr, true, + EntryBB->getTerminator()); + + // LSDA address + Idxs[1] = ConstantInt::get(Int32Ty, 4); + Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", + EntryBB->getTerminator()); + Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", + EntryBB->getTerminator()); + new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + + return FuncCtx; +} + +/// lowerIncomingArguments - To avoid having to handle incoming arguments +/// specially, we lower each arg to a copy instruction in the entry block. This +/// ensures that the argument value itself cannot be live out of the entry +/// block. +void SjLjEHPass::lowerIncomingArguments(Function &F) { + BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) + ++AfterAllocaInsPt; + + for (Function::arg_iterator + AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + Type *Ty = AI->getType(); + + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = + new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); + AI->replaceAllUsesWith(NC); + + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with. We do + // this because the above replaceAllUsesWith() clobbered the operand, but + // we want this one to remain. + NC->setOperand(0, AI); + } + } +} + +/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind +/// edge and spill them. +void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst*> Invokes) { + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator + BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator + II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*, 16> Users; + for (Value::use_iterator + UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } + } +} + +/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling +/// the function context and marking the call sites with the appropriate +/// values. These values are used by the DWARF EH emitter. +bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { + SmallVector<ReturnInst*, 16> Returns; + SmallVector<InvokeInst*, 16> Invokes; + SmallVector<LandingPadInst*, 16> LPads; + + // Look through the terminators of the basic blocks to find invokes. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + LPads.push_back(II->getUnwindDest()->getLandingPadInst()); + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + Returns.push_back(RI); + } + + if (Invokes.empty()) return false; + + lowerIncomingArguments(F); + lowerAcrossUnwindEdges(F, Invokes); + + Value *FuncCtx = setupFunctionContext(F, LPads); + BasicBlock *EntryBB = F.begin(); + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + + Value *Idxs[2] = { + ConstantInt::get(Int32Ty, 0), 0 + }; + + // Get a reference to the jump buffer. + Idxs[1] = ConstantInt::get(Int32Ty, 5); + Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", + EntryBB->getTerminator()); + + // Save the frame pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 0); + Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", + EntryBB->getTerminator()); + + Value *Val = CallInst::Create(FrameAddrFn, + ConstantInt::get(Int32Ty, 0), + "fp", + EntryBB->getTerminator()); + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. + Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); + + // Store a pointer to the function context so that the back-end will know + // where to look for it. + Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); + + // At this point, we are all set up, update the invoke instructions to mark + // their call_site values. + for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { + insertCallSiteStore(Invokes[I], I + 1, CallSite); + + ConstantInt *CallSiteNum = + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + + // Record the call site value for the back end so it stays associated with + // the invoke. + CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); + } + + // Mark call instructions that aren't nounwind as no-action (call_site == + // -1). Skip the entry block, as prior to then, no function context has been + // created for this function and any unexpected exceptions thrown will go + // directly to the caller's context, which is what we want anyway, so no need + // to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (!CI->doesNotThrow()) + insertCallSiteStore(CI, -1, CallSite); + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { + insertCallSiteStore(RI, -1, CallSite); + } + + // Register the function context and make sure it's known to not throw + CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", + EntryBB->getTerminator()); + Register->setDoesNotThrow(); + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned I = 0, E = Returns.size(); I != E; ++I) + CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); + + return true; +} + bool SjLjEHPass::runOnFunction(Function &F) { - bool Res = insertSjLjEHSupport(F); + bool Res = false; + if (!DisableOldSjLjEH) + Res = insertSjLjEHSupport(F); + else + Res = setupEntryBlockAndCallSites(F); return Res; } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index 6949618..6f33f54 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -220,6 +220,7 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { 0, // DontCare, 1, // PrefReg, -1, // PrefSpill + 0, // PrefBoth -HUGE_VALF // MustSpill }; @@ -239,6 +240,22 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { } } +/// addPrefSpill - Same as addConstraints(PrefSpill) +void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + float Freq = getBlockFrequency(*I); + if (Strong) + Freq += Freq; + unsigned ib = bundles->getBundle(*I, 0); + unsigned ob = bundles->getBundle(*I, 1); + activate(ib); + activate(ob); + nodes[ib].addBias(-Freq, 1); + nodes[ob].addBias(-Freq, 0); + } +} + void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index 6952ad8..fc412f8 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -71,6 +71,7 @@ public: DontCare, ///< Block doesn't care / variable not live. PrefReg, ///< Block entry/exit prefers a register. PrefSpill, ///< Block entry/exit prefers a stack slot. + PrefBoth, ///< Block entry prefers both register and stack. MustSpill ///< A register is impossible, variable must be spilled. }; @@ -79,6 +80,11 @@ public: unsigned Number; ///< Basic block number (from MBB::getNumber()). BorderConstraint Entry : 8; ///< Constraint on block entry. BorderConstraint Exit : 8; ///< Constraint on block exit. + + /// True when this block changes the value of the live range. This means + /// the block has a non-PHI def. When this is false, a live-in value on + /// the stack can be live-out on the stack without inserting a spill. + bool ChangesValue; }; /// prepare - Reset state and prepare for a new spill placement computation. @@ -96,6 +102,14 @@ public: /// live out. void addConstraints(ArrayRef<BlockConstraint> LiveBlocks); + /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is + /// equivalent to calling addConstraint with identical BlockConstraints with + /// Entry = Exit = PrefSpill, and ChangesValue = false. + /// + /// @param Blocks Array of block numbers that prefer to spill in and out. + /// @param Strong When true, double the negative bias for these blocks. + void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong); + /// addLinks - Add transparent blocks with the given numbers. void addLinks(ArrayRef<unsigned> Links); diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 761cab7..6362780 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -178,45 +179,55 @@ bool SplitAnalysis::calcLiveBlockInfo() { return false; } else { // This block has uses. Find the first and last uses in the block. - BI.FirstUse = *UseI; - assert(BI.FirstUse >= Start); + BI.FirstInstr = *UseI; + assert(BI.FirstInstr >= Start); do ++UseI; while (UseI != UseE && *UseI < Stop); - BI.LastUse = UseI[-1]; - assert(BI.LastUse < Stop); + BI.LastInstr = UseI[-1]; + assert(BI.LastInstr < Stop); // LVI is the first live segment overlapping MBB. BI.LiveIn = LVI->start <= Start; + // When not live in, the first use should be a def. + if (!BI.LiveIn) { + assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == BI.FirstInstr && "First instr should be a def"); + BI.FirstDef = BI.FirstInstr; + } + // Look for gaps in the live range. BI.LiveOut = true; while (LVI->end < Stop) { SlotIndex LastStop = LVI->end; if (++LVI == LVE || LVI->start >= Stop) { BI.LiveOut = false; - BI.LastUse = LastStop; + BI.LastInstr = LastStop; break; } + if (LastStop < LVI->start) { // There is a gap in the live range. Create duplicate entries for the // live-in snippet and the live-out snippet. ++NumGapBlocks; // Push the Live-in part. - BI.LiveThrough = false; BI.LiveOut = false; UseBlocks.push_back(BI); - UseBlocks.back().LastUse = LastStop; + UseBlocks.back().LastInstr = LastStop; // Set up BI for the live-out part. BI.LiveIn = false; BI.LiveOut = true; - BI.FirstUse = LVI->start; + BI.FirstInstr = BI.FirstDef = LVI->start; } + + // A LiveRange that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + if (!BI.FirstDef) + BI.FirstDef = LVI->start; } - // Don't set LiveThrough when the block has a gap. - BI.LiveThrough = BI.LiveIn && BI.LiveOut; UseBlocks.push_back(BI); // LVI is now at LVE or LVI->end >= Stop. @@ -299,17 +310,21 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), Edit(0), OpenIdx(0), + SpillMode(SM_Partition), RegAssign(Allocator) {} -void SplitEditor::reset(LiveRangeEdit &lre) { - Edit = &lre; +void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { + Edit = &LRE; + SpillMode = SM; OpenIdx = 0; RegAssign.clear(); Values.clear(); - // We don't need to clear LiveOutCache, only LiveOutSeen entries are read. - LiveOutSeen.clear(); + // Reset the LiveRangeCalc instances needed for this spill mode. + LRCalc[0].reset(&VRM.getMachineFunction()); + if (SpillMode) + LRCalc[1].reset(&VRM.getMachineFunction()); // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. @@ -340,7 +355,8 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // Use insert for lookup, so we can add missing values with a second lookup. std::pair<ValueMap::iterator, bool> InsP = - Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI)); + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), + ValueForcePair(VNI, false))); // This was the first time (RegIdx, ParentVNI) was mapped. // Keep it as a simple def without any liveness. @@ -348,11 +364,11 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, return VNI; // If the previous value was a simple mapping, add liveness for it now. - if (VNInfo *OldVNI = InsP.first->second) { + if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); - // No longer a simple mapping. - InsP.first->second = 0; + // No longer a simple mapping. Switch to a complex, non-forced mapping. + InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI @@ -362,230 +378,24 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, return VNI; } -void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) { +void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { assert(ParentVNI && "Mapping NULL value"); - VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)]; + ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)]; + VNInfo *VNI = VFP.getPointer(); - // ParentVNI was either unmapped or already complex mapped. Either way. - if (!VNI) + // ParentVNI was either unmapped or already complex mapped. Either way, just + // set the force bit. + if (!VNI) { + VFP.setInt(true); return; + } // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); - VNI = 0; -} - -// extendRange - Extend the live range to reach Idx. -// Potentially create phi-def values. -void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { - assert(Idx.isValid() && "Invalid SlotIndex"); - MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx); - assert(IdxMBB && "No MBB at Idx"); - LiveInterval *LI = Edit->get(RegIdx); - - // Is there a def in the same MBB we can extend? - if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx)) - return; - - // Now for the fun part. We know that ParentVNI potentially has multiple defs, - // and we may need to create even more phi-defs to preserve VNInfo SSA form. - // Perform a search for all predecessor blocks where we know the dominating - // VNInfo. - VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot()); - - // When there were multiple different values, we may need new PHIs. - if (!VNI) - return updateSSA(); - - // Poor man's SSA update for the single-value case. - LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]); - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), - E = LiveInBlocks.end(); I != E; ++I) { - MachineBasicBlock *MBB = I->DomNode->getBlock(); - SlotIndex Start = LIS.getMBBStartIdx(MBB); - if (I->Kill.isValid()) - LI->addRange(LiveRange(Start, I->Kill, VNI)); - else { - LiveOutCache[MBB] = LOP; - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - } - } -} - -/// findReachingDefs - Search the CFG for known live-out values. -/// Add required live-in blocks to LiveInBlocks. -VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill) { - // Initialize the live-out cache the first time it is needed. - if (LiveOutSeen.empty()) { - unsigned N = VRM.getMachineFunction().getNumBlockIDs(); - LiveOutSeen.resize(N); - LiveOutCache.resize(N); - } - - // Blocks where LI should be live-in. - SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); - - // Remember if we have seen more than one value. - bool UniqueVNI = true; - VNInfo *TheVNI = 0; - - // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. - for (unsigned i = 0; i != WorkList.size(); ++i) { - MachineBasicBlock *MBB = WorkList[i]; - assert(!MBB->pred_empty() && "Value live-in to entry block?"); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - LiveOutPair &LOP = LiveOutCache[Pred]; - - // Is this a known live-out block? - if (LiveOutSeen.test(Pred->getNumber())) { - if (VNInfo *VNI = LOP.first) { - if (TheVNI && TheVNI != VNI) - UniqueVNI = false; - TheVNI = VNI; - } - continue; - } - - // First time. LOP is garbage and must be cleared below. - LiveOutSeen.set(Pred->getNumber()); - - // Does Pred provide a live-out value? - SlotIndex Start, Last; - tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred); - Last = Last.getPrevSlot(); - VNInfo *VNI = LI->extendInBlock(Start, Last); - LOP.first = VNI; - if (VNI) { - LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)]; - if (TheVNI && TheVNI != VNI) - UniqueVNI = false; - TheVNI = VNI; - continue; - } - LOP.second = 0; - - // No, we need a live-in value for Pred as well - if (Pred != KillMBB) - WorkList.push_back(Pred); - else - // Loopback to KillMBB, so value is really live through. - Kill = SlotIndex(); - } - } - - // Transfer WorkList to LiveInBlocks in reverse order. - // This ordering works best with updateSSA(). - LiveInBlocks.clear(); - LiveInBlocks.reserve(WorkList.size()); - while(!WorkList.empty()) - LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]); - - // The kill block may not be live-through. - assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB); - LiveInBlocks.back().Kill = Kill; - - return UniqueVNI ? TheVNI : 0; -} - -void SplitEditor::updateSSA() { - // This is essentially the same iterative algorithm that SSAUpdater uses, - // except we already have a dominator tree, so we don't have to recompute it. - unsigned Changes; - do { - Changes = 0; - // Propagate live-out values down the dominator tree, inserting phi-defs - // when necessary. - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), - E = LiveInBlocks.end(); I != E; ++I) { - MachineDomTreeNode *Node = I->DomNode; - // Skip block if the live-in value has already been determined. - if (!Node) - continue; - MachineBasicBlock *MBB = Node->getBlock(); - MachineDomTreeNode *IDom = Node->getIDom(); - LiveOutPair IDomValue; - - // We need a live-in value to a block with no immediate dominator? - // This is probably an unreachable block that has survived somehow. - bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber()); - - // IDom dominates all of our predecessors, but it may not be their - // immediate dominator. Check if any of them have live-out values that are - // properly dominated by IDom. If so, we need a phi-def here. - if (!needPHI) { - IDomValue = LiveOutCache[IDom->getBlock()]; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - LiveOutPair Value = LiveOutCache[*PI]; - if (!Value.first || Value.first == IDomValue.first) - continue; - // This predecessor is carrying something other than IDomValue. - // It could be because IDomValue hasn't propagated yet, or it could be - // because MBB is in the dominance frontier of that value. - if (MDT.dominates(IDom, Value.second)) { - needPHI = true; - break; - } - } - } - - // The value may be live-through even if Kill is set, as can happen when - // we are called from extendRange. In that case LiveOutSeen is true, and - // LiveOutCache indicates a foreign or missing value. - LiveOutPair &LOP = LiveOutCache[MBB]; - - // Create a phi-def if required. - if (needPHI) { - ++Changes; - SlotIndex Start = LIS.getMBBStartIdx(MBB); - unsigned RegIdx = RegAssign.lookup(Start); - LiveInterval *LI = Edit->get(RegIdx); - VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); - VNI->setIsPHIDef(true); - I->Value = VNI; - // This block is done, we know the final value. - I->DomNode = 0; - if (I->Kill.isValid()) - LI->addRange(LiveRange(Start, I->Kill, VNI)); - else { - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - LOP = LiveOutPair(VNI, Node); - } - } else if (IDomValue.first) { - // No phi-def here. Remember incoming value. - I->Value = IDomValue.first; - if (I->Kill.isValid()) - continue; - // Propagate IDomValue if needed: - // MBB is live-out and doesn't define its own value. - if (LOP.second != Node && LOP.first != IDomValue.first) { - ++Changes; - LOP = IDomValue; - } - } - } - } while (Changes); - - // The values in LiveInBlocks are now accurate. No more phi-defs are needed - // for these blocks, so we can color the live ranges. - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), - E = LiveInBlocks.end(); I != E; ++I) { - if (!I->DomNode) - continue; - assert(I->Value && "No live-in value found"); - MachineBasicBlock *MBB = I->DomNode->getBlock(); - SlotIndex Start = LIS.getMBBStartIdx(MBB); - unsigned RegIdx = RegAssign.lookup(Start); - LiveInterval *LI = Edit->get(RegIdx); - LI->addRange(LiveRange(Start, I->Kill.isValid() ? - I->Kill : LIS.getMBBEndIdx(MBB), I->Value)); - } + // Mark as complex mapped, forced. + VFP = ValueForcePair(0, true); } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -710,17 +520,28 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { DEBUG(dbgs() << " leaveIntvAfter " << Idx); // The interval must be live beyond the instruction at Idx. - Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + SlotIndex Boundary = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); - return Idx.getNextSlot(); + return Boundary.getNextSlot(); } DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); - - MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + MachineInstr *MI = LIS.getInstructionFromIndex(Boundary); assert(MI && "No instruction at index"); - VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), + + // In spill mode, make live ranges as short as possible by inserting the copy + // before MI. This is only possible if that instruction doesn't redefine the + // value. The inserted COPY is not a kill, and we don't need to recompute + // the source live range. The spiller also won't try to hoist this copy. + if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) && + MI->readsVirtualRegister(Edit->getReg())) { + forceRecompute(0, ParentVNI); + defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return Idx; + } + + VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(), llvm::next(MachineBasicBlock::iterator(MI))); return VNI->def; } @@ -730,7 +551,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { DEBUG(dbgs() << " leaveIntvBefore " << Idx); // The interval must be live into the instruction at Idx. - Idx = Idx.getBoundaryIndex(); + Idx = Idx.getBaseIndex(); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); @@ -770,19 +591,219 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); - // The complement interval will be extended as needed by extendRange(). + // The complement interval will be extended as needed by LRCalc.extend(). if (ParentVNI) - markComplexMapped(0, ParentVNI); + forceRecompute(0, ParentVNI); DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); DEBUG(dump()); } +//===----------------------------------------------------------------------===// +// Spill modes +//===----------------------------------------------------------------------===// + +void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { + LiveInterval *LI = Edit->get(0); + DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); + RegAssignMap::iterator AssignI; + AssignI.setMap(RegAssign); + + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + VNInfo *VNI = Copies[i]; + SlotIndex Def = VNI->def; + MachineInstr *MI = LIS.getInstructionFromIndex(Def); + assert(MI && "No instruction for back-copy"); + + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + bool AtBegin; + do AtBegin = MBBI == MBB->begin(); + while (!AtBegin && (--MBBI)->isDebugValue()); + + DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); + LI->removeValNo(VNI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + + // Adjust RegAssign if a register assignment is killed at VNI->def. We + // want to avoid calculating the live range of the source register if + // possible. + AssignI.find(VNI->def.getPrevSlot()); + if (!AssignI.valid() || AssignI.start() >= Def) + continue; + // If MI doesn't kill the assigned register, just leave it. + if (AssignI.stop() != Def) + continue; + unsigned RegIdx = AssignI.value(); + if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { + DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); + forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); + } else { + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex(); + DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); + AssignI.setStop(Kill); + } + } +} + +MachineBasicBlock* +SplitEditor::findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB) { + if (MBB == DefMBB) + return MBB; + assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def."); + + const MachineLoopInfo &Loops = SA.Loops; + const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB); + MachineDomTreeNode *DefDomNode = MDT[DefMBB]; + + // Best candidate so far. + MachineBasicBlock *BestMBB = MBB; + unsigned BestDepth = UINT_MAX; + + for (;;) { + const MachineLoop *Loop = Loops.getLoopFor(MBB); + + // MBB isn't in a loop, it doesn't get any better. All dominators have a + // higher frequency by definition. + if (!Loop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth 0\n"); + return MBB; + } + + // We'll never be able to exit the DefLoop. + if (Loop == DefLoop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " in the same loop\n"); + return MBB; + } + + // Least busy dominator seen so far. + unsigned Depth = Loop->getLoopDepth(); + if (Depth < BestDepth) { + BestMBB = MBB; + BestDepth = Depth; + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth " << Depth << '\n'); + } + + // Leave loop by going to the immediate dominator of the loop header. + // This is a bigger stride than simply walking up the dominator tree. + MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom(); + + // Too far up the dominator tree? + if (!IDom || !MDT.dominates(DefDomNode, IDom)) + return BestMBB; + + MBB = IDom->getBlock(); + } +} + +void SplitEditor::hoistCopiesForSize() { + // Get the complement interval, always RegIdx 0. + LiveInterval *LI = Edit->get(0); + LiveInterval *Parent = &Edit->getParent(); + + // Track the nearest common dominator for all back-copies for each ParentVNI, + // indexed by ParentVNI->id. + typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair; + SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums()); + + // Find the nearest common dominator for parent values with multiple + // back-copies. If a single back-copy dominates, put it in DomPair.second. + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo *VNI = *VI; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + assert(ParentVNI && "Parent not live at complement def"); + + // Don't hoist remats. The complement is probably going to disappear + // completely anyway. + if (Edit->didRematerialize(ParentVNI)) + continue; + + MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def); + DomPair &Dom = NearestDom[ParentVNI->id]; + + // Keep directly defined parent values. This is either a PHI or an + // instruction in the complement range. All other copies of ParentVNI + // should be eliminated. + if (VNI->def == ParentVNI->def) { + DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); + Dom = DomPair(ValMBB, VNI->def); + continue; + } + // Skip the singly mapped values. There is nothing to gain from hoisting a + // single back-copy. + if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) { + DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); + continue; + } + + if (!Dom.first) { + // First time we see ParentVNI. VNI dominates itself. + Dom = DomPair(ValMBB, VNI->def); + } else if (Dom.first == ValMBB) { + // Two defs in the same block. Pick the earlier def. + if (!Dom.second.isValid() || VNI->def < Dom.second) + Dom.second = VNI->def; + } else { + // Different basic blocks. Check if one dominates. + MachineBasicBlock *Near = + MDT.findNearestCommonDominator(Dom.first, ValMBB); + if (Near == ValMBB) + // Def ValMBB dominates. + Dom = DomPair(ValMBB, VNI->def); + else if (Near != Dom.first) + // None dominate. Hoist to common dominator, need new def. + Dom = DomPair(Near, SlotIndex()); + } + + DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def + << " for parent " << ParentVNI->id << '@' << ParentVNI->def + << " hoist to BB#" << Dom.first->getNumber() << ' ' + << Dom.second << '\n'); + } + + // Insert the hoisted copies. + for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { + DomPair &Dom = NearestDom[i]; + if (!Dom.first || Dom.second.isValid()) + continue; + // This value needs a hoisted copy inserted at the end of Dom.first. + VNInfo *ParentVNI = Parent->getValNumInfo(i); + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); + // Get a less loopy dominator than Dom.first. + Dom.first = findShallowDominator(Dom.first, DefMBB); + SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); + Dom.second = + defFromParent(0, ParentVNI, Last, *Dom.first, + LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def; + } + + // Remove redundant back-copies that are now known to be dominated by another + // def with the same value. + SmallVector<VNInfo*, 8> BackCopies; + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo *VNI = *VI; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + const DomPair &Dom = NearestDom[ParentVNI->id]; + if (!Dom.first || Dom.second == VNI->def) + continue; + BackCopies.push_back(VNI); + forceRecompute(0, ParentVNI); + } + removeBackCopies(BackCopies); +} + + /// transferValues - Transfer all possible values to the new live ranges. -/// Values that were rematerialized are left alone, they need extendRange(). +/// Values that were rematerialized are left alone, they need LRCalc.extend(). bool SplitEditor::transferValues() { bool Skipped = false; - LiveInBlocks.clear(); RegAssignMap::const_iterator AssignI = RegAssign.begin(); for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { @@ -812,28 +833,23 @@ bool SplitEditor::transferValues() { LiveInterval *LI = Edit->get(RegIdx); // Check for a simply defined value that can be blitted directly. - if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) { + ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); + if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); LI->addRange(LiveRange(Start, End, VNI)); Start = End; continue; } - // Skip rematerialized values, we need to use extendRange() and - // extendPHIKillRanges() to completely recompute the live ranges. - if (Edit->didRematerialize(ParentVNI)) { - DEBUG(dbgs() << "(remat)"); + // Skip values with forced recomputation. + if (VFP.getInt()) { + DEBUG(dbgs() << "(recalc)"); Skipped = true; Start = End; continue; } - // Initialize the live-out cache the first time it is needed. - if (LiveOutSeen.empty()) { - unsigned N = VRM.getMachineFunction().getNumBlockIDs(); - LiveOutSeen.resize(N); - LiveOutCache.resize(N); - } + LiveRangeCalc &LRC = getLRCalc(RegIdx); // This value has multiple defs in RegIdx, but it wasn't rematerialized, // so the live range is accurate. Add live-in blocks in [Start;End) to the @@ -844,15 +860,13 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LI->extendInBlock(BlockStart, - std::min(BlockEnd, End).getPrevSlot()); + VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? - if (BlockEnd <= End) { - LiveOutSeen.set(MBB->getNumber()); - LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); - } + if (BlockEnd <= End) + LRC.setLiveOutValue(MBB, VNI); + // Skip to the next block for live-in. ++MBB; BlockStart = BlockEnd; @@ -866,25 +880,19 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LI->extendInBlock(BlockStart, - std::min(BlockEnd, End).getPrevSlot()); + VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); - if (End >= BlockEnd) { - // Live-out as well. - LiveOutSeen.set(MBB->getNumber()); - LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); - } + if (End >= BlockEnd) + LRC.setLiveOutValue(MBB, VNI); // Live-out as well. } else { - // This block needs a live-in value. - LiveInBlocks.push_back(MDT[MBB]); - // The last block covered may not be live-out. + // This block needs a live-in value. The last block covered may not + // be live-out. if (End < BlockEnd) - LiveInBlocks.back().Kill = End; + LRC.addLiveInBlock(LI, MDT[MBB], End); else { - // Live-out, but we need updateSSA to tell us the value. - LiveOutSeen.set(MBB->getNumber()); - LiveOutCache[MBB] = LiveOutPair((VNInfo*)0, - (MachineDomTreeNode*)0); + // Live-through, and we don't know the value. + LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.setLiveOutValue(MBB, 0); } } BlockStart = BlockEnd; @@ -895,8 +903,11 @@ bool SplitEditor::transferValues() { DEBUG(dbgs() << '\n'); } - if (!LiveInBlocks.empty()) - updateSSA(); + LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + if (SpillMode) + LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); return Skipped; } @@ -909,16 +920,20 @@ void SplitEditor::extendPHIKillRanges() { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + LiveInterval *LI = Edit->get(RegIdx); + LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); + SlotIndex End = LIS.getMBBEndIdx(*PI); + SlotIndex LastUse = End.getPrevSlot(); // The predecessor may not have a live-out value. That is OK, like an // undef PHI operand. - if (Edit->getParent().liveAt(End)) { - assert(RegAssign.lookup(End) == RegIdx && + if (Edit->getParent().liveAt(LastUse)) { + assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - extendRange(RegIdx, End); + LRC.extend(LI, End, + LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); } } } @@ -938,25 +953,22 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { continue; } - // <undef> operands don't really read the register, so just assign them to - // the complement. - if (MO.isUse() && MO.isUndef()) { - MO.setReg(Edit->get(0)->reg); - continue; - } - + // <undef> operands don't really read the register, so it doesn't matter + // which register we choose. When the use operand is tied to a def, we must + // use the same register as the def, so just do that always. SlotIndex Idx = LIS.getInstructionIndex(MI); - if (MO.isDef()) + if (MO.isDef() || MO.isUndef()) Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - MO.setReg(Edit->get(RegIdx)->reg); + LiveInterval *LI = Edit->get(RegIdx); + MO.setReg(LI->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); // Extend liveness to Idx if the instruction reads reg. - if (!ExtendRanges) + if (!ExtendRanges || MO.isUndef()) continue; // Skip instructions that don't read Reg. @@ -971,7 +983,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getUseIndex(); - extendRange(RegIdx, Idx); + getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), + &MDT, &LIS.getVNInfoAllocator()); } } @@ -1019,11 +1032,24 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { VNI->setIsPHIDef(ParentVNI->isPHIDef()); VNI->setCopy(ParentVNI->getCopy()); - // Mark rematted values as complex everywhere to force liveness computation. + // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. if (Edit->didRematerialize(ParentVNI)) for (unsigned i = 0, e = Edit->size(); i != e; ++i) - markComplexMapped(i, ParentVNI); + forceRecompute(i, ParentVNI); + } + + // Hoist back-copies to the complement interval when in spill mode. + switch (SpillMode) { + case SM_Partition: + // Leave all back-copies as is. + break; + case SM_Size: + hoistCopiesForSize(); + break; + case SM_Speed: + llvm_unreachable("Spill mode 'speed' not implemented yet"); + break; } // Transfer the simply mapped values, check if any are skipped. @@ -1081,50 +1107,39 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { // Single Block Splitting //===----------------------------------------------------------------------===// -/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it -/// may be an advantage to split CurLI for the duration of the block. -bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { - // If CurLI is local to one block, there is no point to splitting it. - if (UseBlocks.size() <= 1) +bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI, + bool SingleInstrs) const { + // Always split for multiple instructions. + if (!BI.isOneInstr()) + return true; + // Don't split for single instructions unless explicitly requested. + if (!SingleInstrs) return false; - // Add blocks with multiple uses. - for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) { - const BlockInfo &BI = UseBlocks[i]; - if (BI.FirstUse == BI.LastUse) - continue; - Blocks.insert(BI.MBB); - } - return !Blocks.empty(); + // Splitting a live-through range always makes progress. + if (BI.LiveIn && BI.LiveOut) + return true; + // No point in isolating a copy. It has no register class constraints. + if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike()) + return false; + // Finally, don't isolate an end point that was created by earlier splits. + return isOriginalEndpoint(BI.FirstInstr); } void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { openIntv(); SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); - SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr, LastSplitPoint)); - if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { - useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastInstr)); } else { // The last use is after the last valid split point. SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); useIntv(SegStart, SegStop); - overlapIntv(SegStop, BI.LastUse); + overlapIntv(SegStop, BI.LastInstr); } } -/// splitSingleBlocks - Split CurLI into a separate live interval inside each -/// basic block in Blocks. -void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { - DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks(); - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - if (Blocks.count(BI.MBB)) - splitSingleBlock(BI); - } - finish(); -} - //===----------------------------------------------------------------------===// // Global Live Range Splitting Support @@ -1149,6 +1164,12 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block"); + assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf"); + assert((!EnterAfter || EnterAfter >= Start) && "Interference before block"); + + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + if (!IntvOut) { DEBUG(dbgs() << ", spill on entry.\n"); // @@ -1157,7 +1178,6 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, // -____________ Spill on entry. // selectIntv(IntvIn); - MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); SlotIndex Idx = leaveIntvAtTop(*MBB); assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); (void)Idx; @@ -1172,7 +1192,6 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, // ___________-- Reload on exit. // selectIntv(IntvOut); - MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); SlotIndex Idx = enterIntvAtEnd(*MBB); assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); (void)Idx; @@ -1192,6 +1211,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, // We cannot legally insert splits after LSP. SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf"); if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { @@ -1201,10 +1221,14 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, // |-----------| Live through. // ------======= Switch intervals between interference. // - SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP; selectIntv(IntvOut); - SlotIndex Idx = enterIntvBefore(Cut); - useIntv(Idx, Stop); + SlotIndex Idx; + if (LeaveBefore && LeaveBefore < LSP) { + Idx = enterIntvBefore(LeaveBefore); + useIntv(Idx, Stop); + } else { + Idx = enterIntvAtEnd(*MBB); + } selectIntv(IntvIn); useIntv(Start, Idx); assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); @@ -1238,7 +1262,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop - << "), uses " << BI.FirstUse << '-' << BI.LastUse + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-in " << IntvIn << ", leave before " << LeaveBefore << (BI.LiveOut ? ", stack-out" : ", killed in block")); @@ -1246,7 +1270,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, assert(BI.LiveIn && "Must be live-in"); assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); - if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) { + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) { DEBUG(dbgs() << " before interference.\n"); // // <<< Interference after kill. @@ -1254,13 +1278,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, // ========= Use IntvIn everywhere. // selectIntv(IntvIn); - useIntv(Start, BI.LastUse); + useIntv(Start, BI.LastInstr); return; } SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); - if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) { + if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) { // // <<< Possible interference after last use. // |---o---o---| Live-out on stack. @@ -1271,17 +1295,17 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, // ============ Copy to stack after LSP, overlap IntvIn. // \_____ Stack interval is live-out. // - if (BI.LastUse < LSP) { + if (BI.LastInstr < LSP) { DEBUG(dbgs() << ", spill after last use before interference.\n"); selectIntv(IntvIn); - SlotIndex Idx = leaveIntvAfter(BI.LastUse); + SlotIndex Idx = leaveIntvAfter(BI.LastInstr); useIntv(Start, Idx); assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); } else { DEBUG(dbgs() << ", spill before last split point.\n"); selectIntv(IntvIn); SlotIndex Idx = leaveIntvBefore(LSP); - overlapIntv(Idx, BI.LastUse); + overlapIntv(Idx, BI.LastInstr); useIntv(Start, Idx); assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); } @@ -1295,13 +1319,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, (void)LocalIntv; DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); - if (!BI.LiveOut || BI.LastUse < LSP) { + if (!BI.LiveOut || BI.LastInstr < LSP) { // // <<<<<<< Interference overlapping uses. // |---o---o---| Live-out on stack. // =====----____ Leave IntvIn before interference, then spill. // - SlotIndex To = leaveIntvAfter(BI.LastUse); + SlotIndex To = leaveIntvAfter(BI.LastInstr); SlotIndex From = enterIntvBefore(LeaveBefore); useIntv(From, To); selectIntv(IntvIn); @@ -1316,7 +1340,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, // \_____ Stack interval is live-out. // SlotIndex To = leaveIntvBefore(LSP); - overlapIntv(To, BI.LastUse); + overlapIntv(To, BI.LastInstr); SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); useIntv(From, To); selectIntv(IntvIn); @@ -1330,7 +1354,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop - << "), uses " << BI.FirstUse << '-' << BI.LastUse + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-out " << IntvOut << ", enter after " << EnterAfter << (BI.LiveIn ? ", stack-in" : ", defined in block")); @@ -1340,7 +1364,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, assert(BI.LiveOut && "Must be live-out"); assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); - if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) { + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) { DEBUG(dbgs() << " after interference.\n"); // // >>>> Interference before def. @@ -1348,11 +1372,11 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, // ========= Use IntvOut everywhere. // selectIntv(IntvOut); - useIntv(BI.FirstUse, Stop); + useIntv(BI.FirstInstr, Stop); return; } - if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) { + if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) { DEBUG(dbgs() << ", reload after interference.\n"); // // >>>> Interference before def. @@ -1360,7 +1384,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, // ____========= Enter IntvOut before first use. // selectIntv(IntvOut); - SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse)); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr)); useIntv(Idx, Stop); assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); return; @@ -1381,6 +1405,6 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); openIntv(); - SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse)); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr)); useIntv(From, Idx); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 7948b72..d8fc212 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -15,13 +15,11 @@ #ifndef LLVM_CODEGEN_SPLITKIT_H #define LLVM_CODEGEN_SPLITKIT_H +#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/SlotIndexes.h" namespace llvm { @@ -38,12 +36,6 @@ class VirtRegMap; class VNInfo; class raw_ostream; -/// At some point we should just include MachineDominators.h: -class MachineDominatorTree; -template <class NodeT> class DomTreeNodeBase; -typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; - - /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting /// opportunities. class SplitAnalysis { @@ -76,16 +68,16 @@ public: /// struct BlockInfo { MachineBasicBlock *MBB; - SlotIndex FirstUse; ///< First instr using current reg. - SlotIndex LastUse; ///< Last instr using current reg. - bool LiveThrough; ///< Live in whole block (Templ 5. above). + SlotIndex FirstInstr; ///< First instr accessing current reg. + SlotIndex LastInstr; ///< Last instr accessing current reg. + SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex(). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. /// isOneInstr - Returns true when this BlockInfo describes a single /// instruction. bool isOneInstr() const { - return SlotIndex::isSameInstr(FirstUse, LastUse); + return SlotIndex::isSameInstr(FirstInstr, LastInstr); } }; @@ -185,10 +177,15 @@ public: typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from - /// having CurLI split to a new live interval. Return true if Blocks can be - /// passed to SplitEditor::splitSingleBlocks. - bool getMultiUseBlocks(BlockPtrSet &Blocks); + /// shouldSplitSingleBlock - Returns true if it would help to create a local + /// live range for the instructions in BI. There is normally no benefit to + /// creating a live range for a single instruction, but it does enable + /// register class inflation if the instruction has a restricted register + /// class. + /// + /// @param BI The block to be isolated. + /// @param SingleInstrs True when single instructions should be isolated. + bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const; }; @@ -212,6 +209,36 @@ class SplitEditor { const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; +public: + + /// ComplementSpillMode - Select how the complement live range should be + /// created. SplitEditor automatically creates interval 0 to contain + /// anything that isn't added to another interval. This complement interval + /// can get quite complicated, and it can sometimes be an advantage to allow + /// it to overlap the other intervals. If it is going to spill anyway, no + /// registers are wasted by keeping a value in two places at the same time. + enum ComplementSpillMode { + /// SM_Partition(Default) - Try to create the complement interval so it + /// doesn't overlap any other intervals, and the original interval is + /// partitioned. This may require a large number of back copies and extra + /// PHI-defs. Only segments marked with overlapIntv will be overlapping. + SM_Partition, + + /// SM_Size - Overlap intervals to minimize the number of inserted COPY + /// instructions. Copies to the complement interval are hoisted to their + /// common dominator, so only one COPY is required per value in the + /// complement interval. This also means that no extra PHI-defs need to be + /// inserted in the complement interval. + SM_Size, + + /// SM_Speed - Overlap intervals to minimize the expected execution + /// frequency of the inserted copies. This is very similar to SM_Size, but + /// the complement interval may get some extra PHI-defs. + SM_Speed + }; + +private: + /// Edit - The current parent register and new intervals created. LiveRangeEdit *Edit; @@ -220,6 +247,9 @@ class SplitEditor { /// openIntv will be 1. unsigned OpenIdx; + /// The current spill mode, selected by reset(). + ComplementSpillMode SpillMode; + typedef IntervalMap<SlotIndex, unsigned> RegAssignMap; /// Allocator for the interval map. This will eventually be shared with @@ -231,65 +261,34 @@ class SplitEditor { /// Idx. RegAssignMap RegAssign; - typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap; + typedef PointerIntPair<VNInfo*, 1> ValueForcePair; + typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap; /// Values - keep track of the mapping from parent values to values in the new /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: /// /// 1. No entry - the value is not mapped to Edit.get(RegIdx). - /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx). - /// Each value is represented by a minimal live range at its def. - /// 3. A non-null VNInfo - the value is mapped to a single new value. + /// 2. (Null, false) - the value is mapped to multiple values in + /// Edit.get(RegIdx). Each value is represented by a minimal live range at + /// its def. The full live range can be inferred exactly from the range + /// of RegIdx in RegAssign. + /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and + /// the live range must be recomputed using LiveRangeCalc::extend(). + /// 4. (VNI, false) The value is mapped to a single new value. /// The new value has no live ranges anywhere. ValueMap Values; - typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; - typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; - - // LiveOutCache - Map each basic block where a new register is live out to the - // live-out value and its defining block. - // One of these conditions shall be true: - // - // 1. !LiveOutCache.count(MBB) - // 2. LiveOutCache[MBB].second.getNode() == MBB - // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] - // - // This is only a cache, the values can be computed as: - // - // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB)) - // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] - // - // The cache is also used as a visited set by extendRange(). It can be shared - // by all the new registers because at most one is live out of each block. - LiveOutMap LiveOutCache; - - // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid - // entry in LiveOutCache. - BitVector LiveOutSeen; - - /// LiveInBlock - Info for updateSSA() about a block where a register is - /// live-in. - /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA() - /// adds the computed live-in value. - struct LiveInBlock { - // Dominator tree node for the block. - // Cleared by updateSSA when the final value has been determined. - MachineDomTreeNode *DomNode; - - // Live-in value filled in by updateSSA once it is known. - VNInfo *Value; - - // Position in block where the live-in range ends, or SlotIndex() if the - // range passes through the block. - SlotIndex Kill; - - LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {} - }; + /// LRCalc - Cache for computing live ranges and SSA update. Each instance + /// can only handle non-overlapping live ranges, so use a separate + /// LiveRangeCalc instance for the complement interval when in spill mode. + LiveRangeCalc LRCalc[2]; - /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and - /// updateSSA(). This list is usually empty, it exists here to avoid frequent - /// reallocations. - SmallVector<LiveInBlock, 16> LiveInBlocks; + /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the + /// complement interval can overlap the other intervals, so it gets its own + /// LRCalc instance. When not in spill mode, all intervals can share one. + LiveRangeCalc &getLRCalc(unsigned RegIdx) { + return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; + } /// defValue - define a value in RegIdx from ParentVNI at Idx. /// Idx does not have to be ParentVNI->def, but it must be contained within @@ -298,9 +297,11 @@ class SplitEditor { /// Return the new LI value. VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); - /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless - /// of the number of defs. - void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI); + /// forceRecompute - Force the live range of ParentVNI in RegIdx to be + /// recomputed by LiveRangeCalc::extend regardless of the number of defs. + /// This is used for values whose live range doesn't match RegAssign exactly. + /// They could have rematerialized, or back-copies may have been moved. + void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI); /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. @@ -310,22 +311,18 @@ class SplitEditor { MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx. - /// Insert PHIDefs as needed to preserve SSA form. - void extendRange(unsigned RegIdx, SlotIndex Idx); + /// removeBackCopies - Remove the copy instructions that defines the values + /// in the vector in the complement interval. + void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies); - /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all - /// reaching defs for LI are found. - /// @param LI Live interval whose value is needed. - /// @param MBB Block where LI should be live-in. - /// @param Kill Kill point in MBB. - /// @return Unique value seen, or NULL. - VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB, - SlotIndex Kill); + /// getShallowDominator - Returns the least busy dominator of MBB that is + /// also dominated by DefMBB. Busy is measured by loop depth. + MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB); - /// updateSSA - Compute and insert PHIDefs such that all blocks in - // LiveInBlocks get a known live-in value. Add live ranges to the blocks. - void updateSSA(); + /// hoistCopiesForSize - Hoist back-copies to the complement interval in a + /// way that minimizes code size. This implements the SM_Size spill mode. + void hoistCopiesForSize(); /// transferValues - Transfer values to the new ranges. /// Return true if any ranges were skipped. @@ -348,7 +345,7 @@ public: MachineDominatorTree&); /// reset - Prepare for a new split. - void reset(LiveRangeEdit&); + void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); /// Create a new virtual register and live interval. /// Return the interval index, starting from 1. Interval index 0 is the @@ -423,10 +420,6 @@ public: /// split, and doesn't call finish(). void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); - /// splitSingleBlocks - Split CurLI into a separate live interval inside each - /// basic block in Blocks. - void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); - /// splitLiveThroughBlock - Split CurLI in the given block such that it /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in /// the block, but they will be ignored when placing split points. diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp index ec75df4..77973b7 100644 --- a/contrib/llvm/lib/CodeGen/Splitter.cpp +++ b/contrib/llvm/lib/CodeGen/Splitter.cpp @@ -11,7 +11,6 @@ #include "Splitter.h" -#include "RegisterCoalescer.h" #include "llvm/Module.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -20,6 +19,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -262,7 +262,7 @@ namespace llvm { au.addPreserved<MachineDominatorTree>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); - au.addPreserved<RegisterCoalescer>(); + au.addPreservedID(RegisterCoalescerPassID); au.addPreserved<CalculateSpillWeights>(); au.addPreserved<LiveStacks>(); au.addRequired<SlotIndexes>(); diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index d3cbd15..1f0e5a2 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -123,7 +123,7 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { + if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { // We apparently only care about character arrays. if (!AT->getElementType()->isIntegerTy(8)) continue; @@ -165,7 +165,7 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp index 227eb47..260cc0e 100644 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -47,6 +47,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -673,7 +674,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, if (PHIColor && SrcColor == PHIColor) { LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); - VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex()); + VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); assert(SrcVNI); SrcVNI->setHasPHIKill(true); continue; diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 6b801cb..3a6211a 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp index 86e71d8..f32678f 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -74,23 +74,25 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); - bool ChangeReg0 = false; - if (HasDef && MI->getOperand(0).getReg() == Reg1) { - // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && - "Expecting a two-address instruction!"); + // If destination is tied to either of the commuted source register, then + // it must be updated. + if (HasDef && Reg0 == Reg1 && + MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; - ChangeReg0 = true; + Reg0 = Reg2; + } else if (HasDef && Reg0 == Reg2 && + MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { + Reg1IsKill = false; + Reg0 = Reg1; } if (NewMI) { // Create a new instruction. - unsigned Reg0 = HasDef - ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0; bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; MachineFunction &MF = *MI->getParent()->getParent(); if (HasDef) @@ -104,8 +106,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, .addReg(Reg1, getKillRegState(Reg2IsKill)); } - if (ChangeReg0) - MI->getOperand(0).setReg(Reg2); + if (HasDef) + MI->getOperand(0).setReg(Reg0); MI->getOperand(Idx2).setReg(Reg1); MI->getOperand(Idx1).setReg(Reg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); @@ -160,6 +162,42 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, return MadeChange; } +bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + +bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, @@ -324,6 +362,19 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + // Remat clients assume operand 0 is the defined register. + if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) + return false; + unsigned DefReg = MI->getOperand(0).getReg(); + + // A sub-register definition can only be rematerialized if the instruction + // doesn't read the other parts of the register. Otherwise it is really a + // read-modify-write operation on the full virtual register which cannot be + // moved safely. + if (TargetRegisterInfo::isVirtualRegister(DefReg) && + MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) + return false; + // A load from a fixed stack slot can be rematerialized. This may be // redundant with subsequent checks, but it's target-independent, // simple, and a common case. @@ -383,8 +434,9 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, continue; } - // Only allow one virtual-register def, and that in the first operand. - if (MO.isDef() != (i == 0)) + // Only allow one virtual-register def. There may be multiple defs of the + // same virtual register, though. + if (MO.isDef() && Reg != DefReg) return false; // Don't allow any virtual-register uses. Rematting an instruction with diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a3c5620..fb87154 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,153 +43,6 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() - : TargetLoweringObjectFile(), - TLSDataSection(0), - TLSBSSSection(0), - DataRelSection(0), - DataRelLocalSection(0), - DataRelROSection(0), - DataRelROLocalSection(0), - MergeableConst4Section(0), - MergeableConst8Section(0), - MergeableConst16Section(0) { -} - -void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFile::Initialize(Ctx, TM); - - BSSSection = - getContext().getELFSection(".bss", ELF::SHT_NOBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getBSS()); - - TextSection = - getContext().getELFSection(".text", ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | - ELF::SHF_ALLOC, - SectionKind::getText()); - - DataSection = - getContext().getELFSection(".data", ELF::SHT_PROGBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getDataRel()); - - ReadOnlySection = - getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC, - SectionKind::getReadOnly()); - - TLSDataSection = - getContext().getELFSection(".tdata", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_TLS | - ELF::SHF_WRITE, - SectionKind::getThreadData()); - - TLSBSSSection = - getContext().getELFSection(".tbss", ELF::SHT_NOBITS, - ELF::SHF_ALLOC | ELF::SHF_TLS | - ELF::SHF_WRITE, - SectionKind::getThreadBSS()); - - DataRelSection = - getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); - - DataRelLocalSection = - getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRelLocal()); - - DataRelROSection = - getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); - - DataRelROLocalSection = - getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); - - MergeableConst4Section = - getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_MERGE, - SectionKind::getMergeableConst4()); - - MergeableConst8Section = - getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_MERGE, - SectionKind::getMergeableConst8()); - - MergeableConst16Section = - getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_MERGE, - SectionKind::getMergeableConst16()); - - StaticCtorSection = - getContext().getELFSection(".ctors", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); - - StaticDtorSection = - getContext().getELFSection(".dtors", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Exception Handling Sections. - - // FIXME: We're emitting LSDA info into a readonly section on ELF, even though - // it contains relocatable pointers. In PIC mode, this is probably a big - // runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC, - SectionKind::getReadOnly()); - // Debug Info Sections. - DwarfAbbrevSection = - getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfInfoSection = - getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLineSection = - getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfFrameSection = - getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfStrSection = - getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLocSection = - getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfARangesSection = - getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfRangesSection = - getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { - return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC, - SectionKind::getDataRel()); -} - MCSymbol * TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, @@ -493,221 +346,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// -TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() - : TargetLoweringObjectFile(), - TLSDataSection(0), - TLSBSSSection(0), - TLSTLVSection(0), - TLSThreadInitSection(0), - CStringSection(0), - UStringSection(0), - TextCoalSection(0), - ConstTextCoalSection(0), - ConstDataSection(0), - DataCoalSection(0), - DataCommonSection(0), - DataBSSSection(0), - FourByteConstantSection(0), - EightByteConstantSection(0), - SixteenByteConstantSection(0), - LazySymbolPointerSection(0), - NonLazySymbolPointerSection(0) { -} - -void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - IsFunctionEHFrameSymbolPrivate = false; - SupportsWeakOmittedEHFrame = false; - - // .comm doesn't support alignment before Leopard. - Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); - if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) - CommDirectiveSupportsAlignment = false; - - TargetLoweringObjectFile::Initialize(Ctx, TM); - - TextSection // .text - = getContext().getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - DataSection // .data - = getContext().getMachOSection("__DATA", "__data", 0, - SectionKind::getDataRel()); - - TLSDataSection // .tdata - = getContext().getMachOSection("__DATA", "__thread_data", - MCSectionMachO::S_THREAD_LOCAL_REGULAR, - SectionKind::getDataRel()); - TLSBSSSection // .tbss - = getContext().getMachOSection("__DATA", "__thread_bss", - MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, - SectionKind::getThreadBSS()); - - // TODO: Verify datarel below. - TLSTLVSection // .tlv - = getContext().getMachOSection("__DATA", "__thread_vars", - MCSectionMachO::S_THREAD_LOCAL_VARIABLES, - SectionKind::getDataRel()); - - TLSThreadInitSection - = getContext().getMachOSection("__DATA", "__thread_init", - MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, - SectionKind::getDataRel()); - - CStringSection // .cstring - = getContext().getMachOSection("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); - UStringSection - = getContext().getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); - FourByteConstantSection // .literal4 - = getContext().getMachOSection("__TEXT", "__literal4", - MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); - EightByteConstantSection // .literal8 - = getContext().getMachOSection("__TEXT", "__literal8", - MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); - - // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back - // to using it in -static mode. - SixteenByteConstantSection = 0; - if (TM.getRelocationModel() != Reloc::Static && - TM.getTargetData()->getPointerSize() == 32) - SixteenByteConstantSection = // .literal16 - getContext().getMachOSection("__TEXT", "__literal16", - MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); - - ReadOnlySection // .const - = getContext().getMachOSection("__TEXT", "__const", 0, - SectionKind::getReadOnly()); - - TextCoalSection - = getContext().getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - ConstTextCoalSection - = getContext().getMachOSection("__TEXT", "__const_coal", - MCSectionMachO::S_COALESCED, - SectionKind::getReadOnly()); - ConstDataSection // .const_data - = getContext().getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); - DataCoalSection - = getContext().getMachOSection("__DATA","__datacoal_nt", - MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); - DataCommonSection - = getContext().getMachOSection("__DATA","__common", - MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - DataBSSSection - = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - - - LazySymbolPointerSection - = getContext().getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - NonLazySymbolPointerSection - = getContext().getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorSection - = getContext().getMachOSection("__TEXT", "__constructor", 0, - SectionKind::getDataRel()); - StaticDtorSection - = getContext().getMachOSection("__TEXT", "__destructor", 0, - SectionKind::getDataRel()); - } else { - StaticCtorSection - = getContext().getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); - StaticDtorSection - = getContext().getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); - } - - // Exception Handling. - LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); - - if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) - CompactUnwindSection = - getContext().getMachOSection("__LD", "__compact_unwind", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getReadOnly()); - - // Debug Information. - DwarfAbbrevSection = - getContext().getMachOSection("__DWARF", "__debug_abbrev", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfInfoSection = - getContext().getMachOSection("__DWARF", "__debug_info", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLineSection = - getContext().getMachOSection("__DWARF", "__debug_line", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfFrameSection = - getContext().getMachOSection("__DWARF", "__debug_frame", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getContext().getMachOSection("__DWARF", "__debug_pubnames", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getContext().getMachOSection("__DWARF", "__debug_pubtypes", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfStrSection = - getContext().getMachOSection("__DWARF", "__debug_str", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLocSection = - getContext().getMachOSection("__DWARF", "__debug_loc", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfARangesSection = - getContext().getMachOSection("__DWARF", "__debug_aranges", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfRangesSection = - getContext().getMachOSection("__DWARF", "__debug_ranges", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getContext().getMachOSection("__DWARF", "__debug_macinfo", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfDebugInlineSection = - getContext().getMachOSection("__DWARF", "__debug_inlined", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - - TLSExtraDataSection = TLSTLVSection; -} - -const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const { - return getContext().getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); -} - const MCSection *TargetLoweringObjectFileMachO:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { @@ -905,183 +543,10 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, return SSym; } -unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { - return DW_EH_PE_pcrel; -} - -unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const { - return DW_EH_PE_pcrel; -} - -unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF() - : TargetLoweringObjectFile(), - DrectveSection(0), - PDataSection(0), - XDataSection(0) { -} - -void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFile::Initialize(Ctx, TM); - TextSection = - getContext().getCOFFSection(".text", - COFF::IMAGE_SCN_CNT_CODE | - COFF::IMAGE_SCN_MEM_EXECUTE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getText()); - DataSection = - getContext().getCOFFSection(".data", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - ReadOnlySection = - getContext().getCOFFSection(".rdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); - StaticCtorSection = - getContext().getCOFFSection(".ctors", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getCOFFSection(".dtors", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - - // FIXME: We're emitting LSDA info into a readonly section on COFF, even - // though it contains relocatable pointers. In PIC mode, this is probably a - // big runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getContext().getCOFFSection(".gcc_except_table", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); - // Debug info. - DwarfAbbrevSection = - getContext().getCOFFSection(".debug_abbrev", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfInfoSection = - getContext().getCOFFSection(".debug_info", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLineSection = - getContext().getCOFFSection(".debug_line", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfFrameSection = - getContext().getCOFFSection(".debug_frame", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getContext().getCOFFSection(".debug_pubnames", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getContext().getCOFFSection(".debug_pubtypes", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfStrSection = - getContext().getCOFFSection(".debug_str", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLocSection = - getContext().getCOFFSection(".debug_loc", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfARangesSection = - getContext().getCOFFSection(".debug_aranges", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfRangesSection = - getContext().getCOFFSection(".debug_ranges", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getContext().getCOFFSection(".debug_macinfo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - - DrectveSection = - getContext().getCOFFSection(".drectve", - COFF::IMAGE_SCN_LNK_INFO, - SectionKind::getMetadata()); - - PDataSection = - getContext().getCOFFSection(".pdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - - XDataSection = - getContext().getCOFFSection(".xdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); -} - -const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const { - return getContext().getCOFFSection(".eh_frame", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); -} - -const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection( - StringRef suffix) const { - if (suffix == "") - return PDataSection; - return getContext().getCOFFSection((".pdata"+suffix).str(), - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); -} - -const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection( - StringRef suffix) const { - if (suffix == "") - return XDataSection; - return getContext().getCOFFSection((".xdata"+suffix).str(), - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); -} - - static unsigned getCOFFSectionFlags(SectionKind K) { unsigned Flags = 0; diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 6d6244e..d879378 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -177,6 +177,10 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, unsigned SavedReg, MachineBasicBlock::iterator OldPos) { + // FIXME: Shouldn't we be trying to do this before we three-addressify the + // instruction? After this transformation is done, we no longer need + // the instruction to be in three-address form. + // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. if (!MI->isSafeToMove(TII, AA, SeenStore)) @@ -217,7 +221,11 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, break; } - if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) + // If we find the instruction that kills SavedReg, and it is in an + // appropriate location, we can try to sink the current instruction + // past it. + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || + KillMI->getDesc().isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -1041,6 +1049,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs(MRI->getNumVirtRegs()); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 7557979..8a1cdc0 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -41,8 +41,8 @@ #include <algorithm> using namespace llvm; -STATISTIC(NumSpills , "Number of register spills"); -STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); +STATISTIC(NumSpillSlots, "Number of spill slots allocated"); +STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); //===----------------------------------------------------------------------===// // VirtRegMap implementation @@ -111,6 +111,7 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { unsigned Idx = SS-LowSpillSlot; while (Idx >= SpillSlotToUsesMap.size()) SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); + ++NumSpillSlots; return SS; } @@ -130,7 +131,6 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - ++NumSpills; return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); } @@ -285,14 +285,24 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { // Preserve semantics of sub-register operands. if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may - // have to add <imp-use,kill> operands for the super-register. - if (MO.isUse()) { - if (MO.isKill() && !MO.isUndef()) - SuperKills.push_back(PhysReg); - } else if (MO.isDead()) - SuperDeads.push_back(PhysReg); - else - SuperDefs.push_back(PhysReg); + // have to add <imp-use,kill> operands for the super-register. A + // partial redef always kills and redefines the super-register. + if (MO.readsReg() && (MO.isDef() || MO.isKill())) + SuperKills.push_back(PhysReg); + + if (MO.isDef()) { + // The <def,undef> flag only makes sense for sub-register defs, and + // we are substituting a full physreg. An <imp-use,kill> operand + // from the SuperKills list will represent the partial read of the + // super-register. + MO.setIsUndef(false); + + // Also add implicit defs for the super-register. + if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); + } // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); diff --git a/contrib/llvm/lib/CompilerDriver/Action.cpp b/contrib/llvm/lib/CompilerDriver/Action.cpp deleted file mode 100644 index a8d625c..0000000 --- a/contrib/llvm/lib/CompilerDriver/Action.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Action class - implementation and auxiliary functions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/Action.h" -#include "llvm/CompilerDriver/BuiltinOptions.h" -#include "llvm/CompilerDriver/Error.h" -#include "llvm/CompilerDriver/Main.h" - -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/SystemUtils.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/TimeValue.h" - -#include <stdexcept> -#include <string> - -using namespace llvm; -using namespace llvmc; - -namespace llvmc { - -extern const char* ProgramName; - -} - -namespace { - - void PrintString (const std::string& str) { - errs() << str << ' '; - } - - void PrintCommand (const std::string& Cmd, const StrVector& Args) { - errs() << Cmd << ' '; - std::for_each(Args.begin(), Args.end(), &PrintString); - errs() << '\n'; - } - - bool IsSegmentationFault (int returnCode) { -#ifdef LLVM_ON_WIN32 - return (returnCode >= 0xc0000000UL) -#else - return (returnCode < 0); -#endif - } - - int ExecuteProgram (const std::string& name, const StrVector& args) { - sys::Path prog(name); - - if (sys::path::is_relative(prog.str())) { - prog = PrependMainExecutablePath(name, ProgramName, - (void *)(intptr_t)&Main); - - if (!prog.canExecute()) { - prog = sys::Program::FindProgramByName(name); - if (prog.isEmpty()) { - PrintError("Can't find program '" + name + "'"); - return -1; - } - } - } - if (!prog.canExecute()) { - PrintError("Program '" + name + "' is not executable."); - return -1; - } - - // Build the command line vector and the redirects array. - const sys::Path* redirects[3] = {0,0,0}; - sys::Path stdout_redirect; - - std::vector<const char*> argv; - argv.reserve((args.size()+2)); - argv.push_back(name.c_str()); - - for (StrVector::const_iterator B = args.begin(), E = args.end(); - B!=E; ++B) { - if (*B == ">") { - ++B; - stdout_redirect.set(*B); - redirects[1] = &stdout_redirect; - } - else { - argv.push_back((*B).c_str()); - } - } - argv.push_back(0); // null terminate list. - - // Invoke the program. - int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]); - - if (IsSegmentationFault(ret)) { - errs() << "Segmentation fault: "; - PrintCommand(name, args); - } - - return ret; - } -} - -namespace llvmc { - void AppendToGlobalTimeLog (const std::string& cmd, double time); -} - -int llvmc::Action::Execute () const { - if (DryRun || VerboseMode) - PrintCommand(Command_, Args_); - - if (!DryRun) { - if (Time) { - sys::TimeValue now = sys::TimeValue::now(); - int ret = ExecuteProgram(Command_, Args_); - sys::TimeValue now2 = sys::TimeValue::now(); - now2 -= now; - double elapsed = now2.seconds() + now2.microseconds() / 1000000.0; - AppendToGlobalTimeLog(Command_, elapsed); - - return ret; - } - else { - return ExecuteProgram(Command_, Args_); - } - } - - return 0; -} diff --git a/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp b/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp deleted file mode 100644 index 3844203..0000000 --- a/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp +++ /dev/null @@ -1,61 +0,0 @@ -//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definitions of all global command-line option variables. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/BuiltinOptions.h" - -#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS -#include "llvm/Support/PluginLoader.h" -#endif - -namespace cl = llvm::cl; - -namespace llvmc { - -cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"), - cl::ZeroOrMore); -cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"), - cl::value_desc("file"), cl::Prefix); -cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"), - cl::value_desc("<directory>"), cl::Prefix); -cl::list<std::string> Languages("x", - cl::desc("Specify the language of the following input files"), - cl::ZeroOrMore); - -cl::opt<bool> DryRun("dry-run", - cl::desc("Only pretend to run commands")); -cl::opt<bool> Time("time", cl::desc("Time individual commands")); -cl::opt<bool> VerboseMode("v", - cl::desc("Enable verbose mode")); - -cl::opt<bool> CheckGraph("check-graph", - cl::desc("Check the compilation graph for errors"), - cl::Hidden); -cl::opt<bool> WriteGraph("write-graph", - cl::desc("Write compilation-graph.dot file"), - cl::Hidden); -cl::opt<bool> ViewGraph("view-graph", - cl::desc("Show compilation graph in GhostView"), - cl::Hidden); - -cl::opt<SaveTempsEnum::Values> SaveTemps -("save-temps", cl::desc("Keep temporary files"), - cl::init(SaveTempsEnum::Unset), - cl::values(clEnumValN(SaveTempsEnum::Obj, "obj", - "Save files in the directory specified with -o"), - clEnumValN(SaveTempsEnum::Cwd, "cwd", - "Use current working directory"), - clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"), - clEnumValEnd), - cl::ValueOptional); - -} // End namespace llvmc. diff --git a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp deleted file mode 100644 index 33c6566..0000000 --- a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp +++ /dev/null @@ -1,655 +0,0 @@ -//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Compilation graph - implementation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/BuiltinOptions.h" -#include "llvm/CompilerDriver/CompilationGraph.h" -#include "llvm/CompilerDriver/Error.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/DOTGraphTraits.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" - -#include <algorithm> -#include <cstring> -#include <iterator> -#include <limits> -#include <queue> - -using namespace llvm; -using namespace llvmc; - -namespace llvmc { - - const std::string* LanguageMap::GetLanguage(const sys::Path& File) const { - // Remove the '.'. - StringRef suf = sys::path::extension(File.str()).substr(1); - LanguageMap::const_iterator Lang = - this->find(suf.empty() ? "*empty*" : suf); - if (Lang == this->end()) { - PrintError("File '" + File.str() + "' has unknown suffix '" - + suf.str() + '\''); - return 0; - } - return &Lang->second; - } -} - -namespace { - - /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error. - template <class C> - const Edge* ChooseEdge(const C& EdgesContainer, - const InputLanguagesSet& InLangs, - const std::string& NodeName = "root") { - const Edge* MaxEdge = 0; - int MaxWeight = 0; - bool SingleMax = true; - - // TODO: fix calculation of SingleMax. - for (typename C::const_iterator B = EdgesContainer.begin(), - E = EdgesContainer.end(); B != E; ++B) { - const Edge* e = B->getPtr(); - int EW = e->Weight(InLangs); - if (EW < 0) { - // (error) invocation in TableGen -> we don't need to print an error - // message. - return 0; - } - if (EW > MaxWeight) { - MaxEdge = e; - MaxWeight = EW; - SingleMax = true; - } else if (EW == MaxWeight) { - SingleMax = false; - } - } - - if (!SingleMax) { - PrintError("Node " + NodeName + ": multiple maximal outward edges found!" - " Most probably a specification error."); - return 0; - } - if (!MaxEdge) { - PrintError("Node " + NodeName + ": no maximal outward edge found!" - " Most probably a specification error."); - return 0; - } - return MaxEdge; - } - -} - -void Node::AddEdge(Edge* Edg) { - // If there already was an edge between two nodes, modify it instead - // of adding a new edge. - const std::string& ToolName = Edg->ToolName(); - for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end(); - B != E; ++B) { - if ((*B)->ToolName() == ToolName) { - llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B); - return; - } - } - OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg)); -} - -CompilationGraph::CompilationGraph() { - NodesMap["root"] = Node(this); -} - -Node* CompilationGraph::getNode(const std::string& ToolName) { - nodes_map_type::iterator I = NodesMap.find(ToolName); - if (I == NodesMap.end()) { - PrintError("Node " + ToolName + " is not in the graph"); - return 0; - } - return &I->second; -} - -const Node* CompilationGraph::getNode(const std::string& ToolName) const { - nodes_map_type::const_iterator I = NodesMap.find(ToolName); - if (I == NodesMap.end()) { - PrintError("Node " + ToolName + " is not in the graph!"); - return 0; - } - return &I->second; -} - -// Find the tools list corresponding to the given language name. -const CompilationGraph::tools_vector_type* -CompilationGraph::getToolsVector(const std::string& LangName) const -{ - tools_map_type::const_iterator I = ToolsMap.find(LangName); - if (I == ToolsMap.end()) { - PrintError("No tool corresponding to the language " + LangName + " found"); - return 0; - } - return &I->second; -} - -void CompilationGraph::insertNode(Tool* V) { - if (NodesMap.count(V->Name()) == 0) - NodesMap[V->Name()] = Node(this, V); -} - -int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) { - Node* B = getNode(Edg->ToolName()); - if (B == 0) - return 1; - - if (A == "root") { - const char** InLangs = B->ToolPtr->InputLanguages(); - for (;*InLangs; ++InLangs) - ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg)); - NodesMap["root"].AddEdge(Edg); - } - else { - Node* N = getNode(A); - if (N == 0) - return 1; - - N->AddEdge(Edg); - } - // Increase the inward edge counter. - B->IncrInEdges(); - - return 0; -} - -// Pass input file through the chain until we bump into a Join node or -// a node that says that it is the last. -int CompilationGraph::PassThroughGraph (const sys::Path& InFile, - const Node* StartNode, - const InputLanguagesSet& InLangs, - const sys::Path& TempDir, - const LanguageMap& LangMap) const { - sys::Path In = InFile; - const Node* CurNode = StartNode; - - while(true) { - Tool* CurTool = CurNode->ToolPtr.getPtr(); - - if (CurTool->IsJoin()) { - JoinTool& JT = static_cast<JoinTool&>(*CurTool); - JT.AddToJoinList(In); - break; - } - - Action CurAction; - if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(), - TempDir, InLangs, LangMap)) { - return ret; - } - - if (int ret = CurAction.Execute()) - return ret; - - if (CurAction.StopCompilation()) - return 0; - - const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name()); - if (Edg == 0) - return 1; - - CurNode = getNode(Edg->ToolName()); - if (CurNode == 0) - return 1; - - In = CurAction.OutFile(); - } - - return 0; -} - -// Find the head of the toolchain corresponding to the given file. -// Also, insert an input language into InLangs. -const Node* CompilationGraph:: -FindToolChain(const sys::Path& In, const std::string* ForceLanguage, - InputLanguagesSet& InLangs, const LanguageMap& LangMap) const { - - // Determine the input language. - const std::string* InLang = (ForceLanguage ? ForceLanguage - : LangMap.GetLanguage(In)); - if (InLang == 0) - return 0; - const std::string& InLanguage = *InLang; - - // Add the current input language to the input language set. - InLangs.insert(InLanguage); - - // Find the toolchain for the input language. - const tools_vector_type* pTV = getToolsVector(InLanguage); - if (pTV == 0) - return 0; - - const tools_vector_type& TV = *pTV; - if (TV.empty()) { - PrintError("No toolchain corresponding to language " - + InLanguage + " found"); - return 0; - } - - const Edge* Edg = ChooseEdge(TV, InLangs); - if (Edg == 0) - return 0; - - return getNode(Edg->ToolName()); -} - -// Helper function used by Build(). -// Traverses initial portions of the toolchains (up to the first Join node). -// This function is also responsible for handling the -x option. -int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs, - const sys::Path& TempDir, - const LanguageMap& LangMap) { - // This is related to -x option handling. - cl::list<std::string>::const_iterator xIter = Languages.begin(), - xBegin = xIter, xEnd = Languages.end(); - bool xEmpty = true; - const std::string* xLanguage = 0; - unsigned xPos = 0, xPosNext = 0, filePos = 0; - - if (xIter != xEnd) { - xEmpty = false; - xPos = Languages.getPosition(xIter - xBegin); - cl::list<std::string>::const_iterator xNext = llvm::next(xIter); - xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max() - : Languages.getPosition(xNext - xBegin); - xLanguage = (*xIter == "none") ? 0 : &(*xIter); - } - - // For each input file: - for (cl::list<std::string>::const_iterator B = InputFilenames.begin(), - CB = B, E = InputFilenames.end(); B != E; ++B) { - sys::Path In = sys::Path(*B); - - // Code for handling the -x option. - // Output: std::string* xLanguage (can be NULL). - if (!xEmpty) { - filePos = InputFilenames.getPosition(B - CB); - - if (xPos < filePos) { - if (filePos < xPosNext) { - xLanguage = (*xIter == "none") ? 0 : &(*xIter); - } - else { // filePos >= xPosNext - // Skip xIters while filePos > xPosNext - while (filePos > xPosNext) { - ++xIter; - xPos = xPosNext; - - cl::list<std::string>::const_iterator xNext = llvm::next(xIter); - if (xNext == xEnd) - xPosNext = std::numeric_limits<unsigned>::max(); - else - xPosNext = Languages.getPosition(xNext - xBegin); - xLanguage = (*xIter == "none") ? 0 : &(*xIter); - } - } - } - } - - // Find the toolchain corresponding to this file. - const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap); - if (N == 0) - return 1; - // Pass file through the chain starting at head. - if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap)) - return ret; - } - - return 0; -} - -// Sort the nodes in topological order. -int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) { - std::queue<const Node*> Q; - - Node* Root = getNode("root"); - if (Root == 0) - return 1; - - Q.push(Root); - - while (!Q.empty()) { - const Node* A = Q.front(); - Q.pop(); - Out.push_back(A); - for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); - EB != EE; ++EB) { - Node* B = getNode((*EB)->ToolName()); - if (B == 0) - return 1; - - B->DecrInEdges(); - if (B->HasNoInEdges()) - Q.push(B); - } - } - - return 0; -} - -namespace { - bool NotJoinNode(const Node* N) { - return N->ToolPtr ? !N->ToolPtr->IsJoin() : true; - } -} - -// Call TopologicalSort and filter the resulting list to include -// only Join nodes. -int CompilationGraph:: -TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) { - std::vector<const Node*> TopSorted; - if (int ret = TopologicalSort(TopSorted)) - return ret; - std::remove_copy_if(TopSorted.begin(), TopSorted.end(), - std::back_inserter(Out), NotJoinNode); - - return 0; -} - -int CompilationGraph::Build (const sys::Path& TempDir, - const LanguageMap& LangMap) { - InputLanguagesSet InLangs; - bool WasSomeActionGenerated = !InputFilenames.empty(); - - // Traverse initial parts of the toolchains and fill in InLangs. - if (int ret = BuildInitial(InLangs, TempDir, LangMap)) - return ret; - - std::vector<const Node*> JTV; - if (int ret = TopologicalSortFilterJoinNodes(JTV)) - return ret; - - // For all join nodes in topological order: - for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end(); - B != E; ++B) { - - const Node* CurNode = *B; - JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr()); - - // Are there any files in the join list? - if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty())) - continue; - - WasSomeActionGenerated = true; - Action CurAction; - if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(), - TempDir, InLangs, LangMap)) { - return ret; - } - - if (int ret = CurAction.Execute()) - return ret; - - if (CurAction.StopCompilation()) - return 0; - - const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name()); - if (Edg == 0) - return 1; - - const Node* NextNode = getNode(Edg->ToolName()); - if (NextNode == 0) - return 1; - - if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode, - InLangs, TempDir, LangMap)) { - return ret; - } - } - - if (!WasSomeActionGenerated) { - PrintError("no input files"); - return 1; - } - - return 0; -} - -int CompilationGraph::CheckLanguageNames() const { - int ret = 0; - - // Check that names for output and input languages on all edges do match. - for (const_nodes_iterator B = this->NodesMap.begin(), - E = this->NodesMap.end(); B != E; ++B) { - - const Node & N1 = B->second; - if (N1.ToolPtr) { - for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd(); - EB != EE; ++EB) { - const Node* N2 = this->getNode((*EB)->ToolName()); - if (N2 == 0) - return 1; - - if (!N2->ToolPtr) { - ++ret; - errs() << "Error: there is an edge from '" << N1.ToolPtr->Name() - << "' back to the root!\n\n"; - continue; - } - - const char** OutLangs = N1.ToolPtr->OutputLanguages(); - const char** InLangs = N2->ToolPtr->InputLanguages(); - bool eq = false; - const char* OutLang = 0; - for (;*OutLangs; ++OutLangs) { - OutLang = *OutLangs; - for (;*InLangs; ++InLangs) { - if (std::strcmp(OutLang, *InLangs) == 0) { - eq = true; - break; - } - } - } - - if (!eq) { - ++ret; - errs() << "Error: Output->input language mismatch in the edge '" - << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name() - << "'!\n" - << "Expected one of { "; - - InLangs = N2->ToolPtr->InputLanguages(); - for (;*InLangs; ++InLangs) { - errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'"); - } - - errs() << " }, but got '" << OutLang << "'!\n\n"; - } - - } - } - } - - return ret; -} - -int CompilationGraph::CheckMultipleDefaultEdges() const { - int ret = 0; - InputLanguagesSet Dummy; - - // For all nodes, just iterate over the outgoing edges and check if there is - // more than one edge with maximum weight. - for (const_nodes_iterator B = this->NodesMap.begin(), - E = this->NodesMap.end(); B != E; ++B) { - const Node& N = B->second; - int MaxWeight = -1024; - - // Ignore the root node. - if (!N.ToolPtr) - continue; - - for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd(); - EB != EE; ++EB) { - int EdgeWeight = (*EB)->Weight(Dummy); - if (EdgeWeight > MaxWeight) { - MaxWeight = EdgeWeight; - } - else if (EdgeWeight == MaxWeight) { - ++ret; - errs() << "Error: there are multiple maximal edges stemming from the '" - << N.ToolPtr->Name() << "' node!\n\n"; - break; - } - } - } - - return ret; -} - -int CompilationGraph::CheckCycles() { - unsigned deleted = 0; - std::queue<Node*> Q; - - Node* Root = getNode("root"); - if (Root == 0) - return 1; - - Q.push(Root); - - // Try to delete all nodes that have no ingoing edges, starting from the - // root. If there are any nodes left after this operation, then we have a - // cycle. This relies on '--check-graph' not performing the topological sort. - while (!Q.empty()) { - Node* A = Q.front(); - Q.pop(); - ++deleted; - - for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); - EB != EE; ++EB) { - Node* B = getNode((*EB)->ToolName()); - if (B == 0) - return 1; - - B->DecrInEdges(); - if (B->HasNoInEdges()) - Q.push(B); - } - } - - if (deleted != NodesMap.size()) { - errs() << "Error: there are cycles in the compilation graph!\n" - << "Try inspecting the diagram produced by " - << "'llvmc --view-graph'.\n\n"; - return 1; - } - - return 0; -} - -int CompilationGraph::Check () { - // We try to catch as many errors as we can in one go. - int errs = 0; - int ret = 0; - - // Check that output/input language names match. - ret = this->CheckLanguageNames(); - if (ret < 0) - return 1; - errs += ret; - - // Check for multiple default edges. - ret = this->CheckMultipleDefaultEdges(); - if (ret < 0) - return 1; - errs += ret; - - // Check for cycles. - ret = this->CheckCycles(); - if (ret < 0) - return 1; - errs += ret; - - return errs; -} - -// Code related to graph visualization. - -namespace { - -std::string SquashStrArray (const char** StrArr) { - std::string ret; - - for (; *StrArr; ++StrArr) { - if (*(StrArr + 1)) { - ret += *StrArr; - ret += ", "; - } - else { - ret += *StrArr; - } - } - - return ret; -} - -} // End anonymous namespace. - -namespace llvm { - template <> - struct DOTGraphTraits<llvmc::CompilationGraph*> - : public DefaultDOTGraphTraits - { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} - - template<typename GraphType> - static std::string getNodeLabel(const Node* N, const GraphType&) - { - if (N->ToolPtr) - if (N->ToolPtr->IsJoin()) - return N->Name() + "\n (join" + - (N->HasChildren() ? ")" - : std::string(": ") + - SquashStrArray(N->ToolPtr->OutputLanguages()) + ')'); - else - return N->Name(); - else - return "root"; - } - - template<typename EdgeIter> - static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) { - if (N->ToolPtr) { - return SquashStrArray(N->ToolPtr->OutputLanguages()); - } - else { - return SquashStrArray(I->ToolPtr->InputLanguages()); - } - } - }; - -} // End namespace llvm - -int CompilationGraph::writeGraph(const std::string& OutputFilename) { - std::string ErrorInfo; - raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo); - - if (ErrorInfo.empty()) { - errs() << "Writing '"<< OutputFilename << "' file..."; - llvm::WriteGraph(O, this); - errs() << "done.\n"; - } - else { - PrintError("Error opening file '" + OutputFilename + "' for writing!"); - return 1; - } - - return 0; -} - -void CompilationGraph::viewGraph() { - llvm::ViewGraph(this, "compilation-graph"); -} diff --git a/contrib/llvm/lib/CompilerDriver/Main.cpp b/contrib/llvm/lib/CompilerDriver/Main.cpp deleted file mode 100644 index 7120027..0000000 --- a/contrib/llvm/lib/CompilerDriver/Main.cpp +++ /dev/null @@ -1,146 +0,0 @@ -//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// llvmc::Main function - driver entry point. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/AutoGenerated.h" -#include "llvm/CompilerDriver/BuiltinOptions.h" -#include "llvm/CompilerDriver/CompilationGraph.h" -#include "llvm/CompilerDriver/Error.h" - -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Path.h" - -#include <sstream> -#include <string> - -namespace cl = llvm::cl; -namespace sys = llvm::sys; -using namespace llvmc; - -namespace { - - std::stringstream* GlobalTimeLog; - - /// GetTempDir - Get the temporary directory location. Returns non-zero value - /// on error. - int GetTempDir(sys::Path& tempDir) { - // The --temp-dir option. - if (!TempDirname.empty()) { - tempDir = TempDirname; - } - // GCC 4.5-style -save-temps handling. - else if (SaveTemps == SaveTempsEnum::Unset) { - tempDir = sys::Path::GetTemporaryDirectory(); - return 0; - } - else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) { - tempDir = sys::path::parent_path(OutputFilename); - } - else { - // SaveTemps == Cwd --> use current dir (leave tempDir empty). - return 0; - } - - bool Exists; - if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) { - std::string ErrMsg; - if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) { - PrintError(ErrMsg); - return 1; - } - } - - return 0; - } - - /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns - /// non-zero value in case of error. - int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) { - int ret; - sys::Path tempDir; - bool toDelete = (SaveTemps == SaveTempsEnum::Unset); - - if (int ret = GetTempDir(tempDir)) - return ret; - - ret = graph.Build(tempDir, langMap); - - if (toDelete) - tempDir.eraseFromDisk(true); - - return ret; - } -} - -namespace llvmc { - -// Used to implement -time option. External linkage is intentional. -void AppendToGlobalTimeLog(const std::string& cmd, double time) { - *GlobalTimeLog << "# " << cmd << ' ' << time << '\n'; -} - -// Sometimes user code wants to access the argv[0] value. -const char* ProgramName; - -int Main(int argc, char** argv) { - int ret = 0; - LanguageMap langMap; - CompilationGraph graph; - - ProgramName = argv[0]; - - cl::ParseCommandLineOptions - (argc, argv, - /* Overview = */ "LLVM Compiler Driver (Work In Progress)", - /* ReadResponseFiles = */ false); - - if (int ret = autogenerated::RunInitialization(langMap, graph)) - return ret; - - if (CheckGraph) { - ret = graph.Check(); - if (!ret) - llvm::errs() << "check-graph: no errors found.\n"; - - return ret; - } - - if (ViewGraph) { - graph.viewGraph(); - if (!WriteGraph) - return 0; - } - - if (WriteGraph) { - const std::string& Out = (OutputFilename.empty() - ? std::string("compilation-graph.dot") - : OutputFilename); - return graph.writeGraph(Out); - } - - if (Time) { - GlobalTimeLog = new std::stringstream; - GlobalTimeLog->precision(2); - } - - ret = BuildTargets(graph, langMap); - - if (Time) { - llvm::errs() << GlobalTimeLog->str(); - delete GlobalTimeLog; - } - - return ret; -} - -} // end namespace llvmc diff --git a/contrib/llvm/lib/CompilerDriver/Tool.cpp b/contrib/llvm/lib/CompilerDriver/Tool.cpp deleted file mode 100644 index 876759a..0000000 --- a/contrib/llvm/lib/CompilerDriver/Tool.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Tool base class - implementation details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/BuiltinOptions.h" -#include "llvm/CompilerDriver/Tool.h" - -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Path.h" - -#include <algorithm> - -using namespace llvm; -using namespace llvmc; - -namespace { - sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName, - const std::string& Suffix) { - sys::Path Out; - - // Make sure we don't end up with path names like '/file.o' if the - // TempDir is empty. - if (TempDir.empty()) { - Out.set(BaseName); - } - else { - Out = TempDir; - Out.appendComponent(BaseName); - } - Out.appendSuffix(Suffix); - // NOTE: makeUnique always *creates* a unique temporary file, - // which is good, since there will be no races. However, some - // tools do not like it when the output file already exists, so - // they need to be placated with -f or something like that. - Out.makeUnique(true, NULL); - return Out; - } -} - -sys::Path Tool::OutFilename(const sys::Path& In, - const sys::Path& TempDir, - bool StopCompilation, - const char* OutputSuffix) const { - sys::Path Out; - - if (StopCompilation) { - if (!OutputFilename.empty()) { - Out.set(OutputFilename); - } - else if (IsJoin()) { - Out.set("a"); - Out.appendSuffix(OutputSuffix); - } - else { - Out.set(sys::path::stem(In.str())); - Out.appendSuffix(OutputSuffix); - } - } - else { - if (IsJoin()) - Out = MakeTempFile(TempDir, "tmp", OutputSuffix); - else - Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix); - } - return Out; -} - -namespace { - template <class A, class B> - bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) { - return std::less<A>()(p1.first, p2.first); - } -} - -StrVector Tool::SortArgs(ArgsVector& Args) const { - StrVector Out; - - // HACK: this won't be needed when we'll migrate away from CommandLine. - std::stable_sort(Args.begin(), Args.end(), - &CompareFirst<unsigned, std::string>); - for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) { - Out.push_back(B->second); - } - - return Out; -} diff --git a/contrib/llvm/lib/DebugInfo/DIContext.cpp b/contrib/llvm/lib/DebugInfo/DIContext.cpp new file mode 100644 index 0000000..e2fd55f --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DIContext.cpp @@ -0,0 +1,24 @@ +//===-- DIContext.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DIContext.h" +#include "DWARFContext.h" +using namespace llvm; + +DIContext::~DIContext() {} + +DIContext *DIContext::getDWARFContext(bool isLittleEndian, + StringRef infoSection, + StringRef abbrevSection, + StringRef aRangeSection, + StringRef lineSection, + StringRef stringSection) { + return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection, + aRangeSection, lineSection, stringSection); +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp new file mode 100644 index 0000000..0df692c --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp @@ -0,0 +1,83 @@ +//===-- DWARFAbbreviationDeclaration.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFAbbreviationDeclaration.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace dwarf; + +bool +DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr){ + return extract(data, offset_ptr, data.getULEB128(offset_ptr)); +} + +bool +DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr, + uint32_t code) { + Code = code; + Attributes.clear(); + if (Code) { + Tag = data.getULEB128(offset_ptr); + HasChildren = data.getU8(offset_ptr); + + while (data.isValidOffset(*offset_ptr)) { + uint16_t attr = data.getULEB128(offset_ptr); + uint16_t form = data.getULEB128(offset_ptr); + + if (attr && form) + Attributes.push_back(DWARFAttribute(attr, form)); + else + break; + } + + return Tag != 0; + } else { + Tag = 0; + HasChildren = false; + } + + return false; +} + +void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { + const char *tagString = TagString(getTag()); + OS << '[' << getCode() << "] "; + if (tagString) + OS << tagString; + else + OS << format("DW_TAG_Unknown_%x", getTag()); + OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n'; + for (unsigned i = 0, e = Attributes.size(); i != e; ++i) { + OS << '\t'; + const char *attrString = AttributeString(Attributes[i].getAttribute()); + if (attrString) + OS << attrString; + else + OS << format("DW_AT_Unknown_%x", Attributes[i].getAttribute()); + OS << '\t'; + const char *formString = FormEncodingString(Attributes[i].getForm()); + if (formString) + OS << formString; + else + OS << format("DW_FORM_Unknown_%x", Attributes[i].getForm()); + OS << '\n'; + } + OS << '\n'; +} + +uint32_t +DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const { + for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) { + if (Attributes[i].getAttribute() == attr) + return i; + } + return -1U; +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h new file mode 100644 index 0000000..2463a3c --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h @@ -0,0 +1,54 @@ +//===-- DWARFAbbreviationDeclaration.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H +#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H + +#include "DWARFAttribute.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { + +class raw_ostream; + +class DWARFAbbreviationDeclaration { + uint32_t Code; + uint32_t Tag; + bool HasChildren; + SmallVector<DWARFAttribute, 8> Attributes; +public: + enum { InvalidCode = 0 }; + DWARFAbbreviationDeclaration() + : Code(InvalidCode), Tag(0), HasChildren(0) {} + + uint32_t getCode() const { return Code; } + uint32_t getTag() const { return Tag; } + bool hasChildren() const { return HasChildren; } + uint32_t getNumAttributes() const { return Attributes.size(); } + uint16_t getAttrByIndex(uint32_t idx) const { + return Attributes.size() > idx ? Attributes[idx].getAttribute() : 0; + } + uint16_t getFormByIndex(uint32_t idx) const { + return Attributes.size() > idx ? Attributes[idx].getForm() : 0; + } + + uint32_t findAttributeIndex(uint16_t attr) const; + bool extract(DataExtractor data, uint32_t* offset_ptr); + bool extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code); + bool isValid() const { return Code != 0 && Tag != 0; } + void dump(raw_ostream &OS) const; + const SmallVectorImpl<DWARFAttribute> &getAttributes() const { + return Attributes; + } +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFAttribute.h b/contrib/llvm/lib/DebugInfo/DWARFAttribute.h new file mode 100644 index 0000000..6f49b63 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFAttribute.h @@ -0,0 +1,30 @@ +//===-- DWARFAttribute.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H +#define LLVM_DEBUGINFO_DWARFATTRIBUTE_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class DWARFAttribute { + uint16_t Attribute; + uint16_t Form; + public: + DWARFAttribute(uint16_t attr, uint16_t form) + : Attribute(attr), Form(form) {} + + uint16_t getAttribute() const { return Attribute; } + uint16_t getForm() const { return Form; } +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp new file mode 100644 index 0000000..24bf97f --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp @@ -0,0 +1,238 @@ +//===-- DWARFCompileUnit.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFCompileUnit.h" +#include "DWARFContext.h" +#include "DWARFFormValue.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace dwarf; + +DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const { + return DataExtractor(Context.getInfoSection(), + Context.isLittleEndian(), getAddressByteSize()); +} + +bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { + clear(); + + Offset = *offset_ptr; + + if (debug_info.isValidOffset(*offset_ptr)) { + uint64_t abbrOffset; + const DWARFDebugAbbrev *abbr = Context.getDebugAbbrev(); + Length = debug_info.getU32(offset_ptr); + Version = debug_info.getU16(offset_ptr); + abbrOffset = debug_info.getU32(offset_ptr); + AddrSize = debug_info.getU8(offset_ptr); + + bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1); + bool versionOK = DWARFContext::isSupportedVersion(Version); + bool abbrOffsetOK = Context.getAbbrevSection().size() > abbrOffset; + bool addrSizeOK = AddrSize == 4 || AddrSize == 8; + + if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && abbr != NULL) { + Abbrevs = abbr->getAbbreviationDeclarationSet(abbrOffset); + return true; + } + + // reset the offset to where we tried to parse from if anything went wrong + *offset_ptr = Offset; + } + + return false; +} + +uint32_t +DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, + const DWARFAbbreviationDeclarationSet *abbrevs) { + clear(); + + Offset = offset; + + if (debug_info_data.isValidOffset(offset)) { + Length = debug_info_data.getU32(&offset); + Version = debug_info_data.getU16(&offset); + bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset(); + Abbrevs = abbrevs; + AddrSize = debug_info_data.getU8 (&offset); + + bool versionOK = DWARFContext::isSupportedVersion(Version); + bool addrSizeOK = AddrSize == 4 || AddrSize == 8; + + if (versionOK && addrSizeOK && abbrevsOK && + debug_info_data.isValidOffset(offset)) + return offset; + } + return 0; +} + +void DWARFCompileUnit::clear() { + Offset = 0; + Length = 0; + Version = 0; + Abbrevs = 0; + AddrSize = 0; + BaseAddr = 0; + DieArray.clear(); +} + +void DWARFCompileUnit::dump(raw_ostream &OS) { + OS << format("0x%08x", Offset) << ": Compile Unit:" + << " length = " << format("0x%08x", Length) + << " version = " << format("0x%04x", Version) + << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset()) + << " addr_size = " << format("0x%02x", AddrSize) + << " (next CU at " << format("0x%08x", getNextCompileUnitOffset()) + << ")\n"; + + getCompileUnitDIE(false)->dump(OS, this, -1U); +} + +void DWARFCompileUnit::setDIERelations() { + if (DieArray.empty()) + return; + DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front(); + DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back(); + DWARFDebugInfoEntryMinimal *curr_die; + // We purposely are skipping the last element in the array in the loop below + // so that we can always have a valid next item + for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) { + // Since our loop doesn't include the last element, we can always + // safely access the next die in the array. + DWARFDebugInfoEntryMinimal *next_die = curr_die + 1; + + const DWARFAbbreviationDeclaration *curr_die_abbrev = + curr_die->getAbbreviationDeclarationPtr(); + + if (curr_die_abbrev) { + // Normal DIE + if (curr_die_abbrev->hasChildren()) + next_die->setParent(curr_die); + else + curr_die->setSibling(next_die); + } else { + // NULL DIE that terminates a sibling chain + DWARFDebugInfoEntryMinimal *parent = curr_die->getParent(); + if (parent) + parent->setSibling(next_die); + } + } + + // Since we skipped the last element, we need to fix it up! + if (die_array_begin < die_array_end) + curr_die->setParent(die_array_begin); +} + +size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { + const size_t initial_die_array_size = DieArray.size(); + if ((cu_die_only && initial_die_array_size > 0) || + initial_die_array_size > 1) + return 0; // Already parsed + + // Set the offset to that of the first DIE and calculate the start of the + // next compilation unit header. + uint32_t offset = getFirstDIEOffset(); + uint32_t next_cu_offset = getNextCompileUnitOffset(); + + DWARFDebugInfoEntryMinimal die; + // Keep a flat array of the DIE for binary lookup by DIE offset + uint32_t depth = 0; + // We are in our compile unit, parse starting at the offset + // we were told to parse + + const uint8_t *fixed_form_sizes = + DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize()); + + while (offset < next_cu_offset && + die.extractFast(this, fixed_form_sizes, &offset)) { + + if (depth == 0) { + uint64_t base_addr = + die.getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U); + if (base_addr == -1U) + base_addr = die.getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0); + setBaseAddress(base_addr); + } + + if (cu_die_only) { + addDIE(die); + return 1; + } + else if (depth == 0 && initial_die_array_size == 1) { + // Don't append the CU die as we already did that + } else { + addDIE (die); + } + + const DWARFAbbreviationDeclaration *abbrDecl = + die.getAbbreviationDeclarationPtr(); + if (abbrDecl) { + // Normal DIE + if (abbrDecl->hasChildren()) + ++depth; + } else { + // NULL DIE. + if (depth > 0) + --depth; + if (depth == 0) + break; // We are done with this compile unit! + } + + } + + // Give a little bit of info if we encounter corrupt DWARF (our offset + // should always terminate at or before the start of the next compilation + // unit header). + if (offset > next_cu_offset) { + fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); + } + + setDIERelations(); + return DieArray.size(); +} + +void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) { + if (DieArray.size() > 1) { + // std::vectors never get any smaller when resized to a smaller size, + // or when clear() or erase() are called, the size will report that it + // is smaller, but the memory allocated remains intact (call capacity() + // to see this). So we need to create a temporary vector and swap the + // contents which will cause just the internal pointers to be swapped + // so that when "tmp_array" goes out of scope, it will destroy the + // contents. + + // Save at least the compile unit DIE + std::vector<DWARFDebugInfoEntryMinimal> tmpArray; + DieArray.swap(tmpArray); + if (keep_compile_unit_die) + DieArray.push_back(tmpArray.front()); + } +} + +void +DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, + bool clear_dies_if_already_not_parsed){ + // This function is usually called if there in no .debug_aranges section + // in order to produce a compile unit level set of address ranges that + // is accurate. If the DIEs weren't parsed, then we don't want all dies for + // all compile units to stay loaded when they weren't needed. So we can end + // up parsing the DWARF and then throwing them all away to keep memory usage + // down. + const bool clear_dies = extractDIEsIfNeeded(false) > 1; + + DieArray[0].buildAddressRangeTable(this, debug_aranges); + + // Keep memory down by clearing DIEs if this generate function + // caused them to be parsed. + if (clear_dies) + clearDIEs(true); +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h new file mode 100644 index 0000000..d916729 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h @@ -0,0 +1,111 @@ +//===-- DWARFCompileUnit.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H +#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H + +#include "DWARFDebugAbbrev.h" +#include "DWARFDebugInfoEntry.h" +#include <vector> + +namespace llvm { + +class DWARFContext; +class raw_ostream; + +class DWARFCompileUnit { + DWARFContext &Context; + + uint32_t Offset; + uint32_t Length; + uint16_t Version; + const DWARFAbbreviationDeclarationSet *Abbrevs; + uint8_t AddrSize; + uint64_t BaseAddr; + // The compile unit debug information entry item. + std::vector<DWARFDebugInfoEntryMinimal> DieArray; +public: + DWARFCompileUnit(DWARFContext &context) : Context(context) { + clear(); + } + + DWARFContext &getContext() const { return Context; } + DataExtractor getDebugInfoExtractor() const; + + bool extract(DataExtractor debug_info, uint32_t* offset_ptr); + uint32_t extract(uint32_t offset, DataExtractor debug_info_data, + const DWARFAbbreviationDeclarationSet *abbrevs); + + /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it + /// hasn't already been done. + size_t extractDIEsIfNeeded(bool cu_die_only); + void clear(); + void dump(raw_ostream &OS); + uint32_t getOffset() const { return Offset; } + /// Size in bytes of the compile unit header. + uint32_t getSize() const { return 11; } + bool containsDIEOffset(uint32_t die_offset) const { + return die_offset >= getFirstDIEOffset() && + die_offset < getNextCompileUnitOffset(); + } + uint32_t getFirstDIEOffset() const { return Offset + getSize(); } + uint32_t getNextCompileUnitOffset() const { return Offset + Length + 4; } + /// Size in bytes of the .debug_info data associated with this compile unit. + size_t getDebugInfoSize() const { return Length + 4 - getSize(); } + uint32_t getLength() const { return Length; } + uint16_t getVersion() const { return Version; } + const DWARFAbbreviationDeclarationSet *getAbbreviations() const { + return Abbrevs; + } + uint8_t getAddressByteSize() const { return AddrSize; } + uint64_t getBaseAddress() const { return BaseAddr; } + + void setBaseAddress(uint64_t base_addr) { + BaseAddr = base_addr; + } + + const DWARFDebugInfoEntryMinimal * + getCompileUnitDIE(bool extract_cu_die_only = true) { + extractDIEsIfNeeded(extract_cu_die_only); + if (DieArray.empty()) + return NULL; + return &DieArray[0]; + } + + /// setDIERelations - We read in all of the DIE entries into our flat list + /// of DIE entries and now we need to go back through all of them and set the + /// parent, sibling and child pointers for quick DIE navigation. + void setDIERelations(); + + void addDIE(DWARFDebugInfoEntryMinimal &die) { + // The average bytes per DIE entry has been seen to be + // around 14-20 so lets pre-reserve the needed memory for + // our DIE entries accordingly. Search forward for "Compute + // average bytes per DIE" to see #if'ed out code that does + // that determination. + + // Only reserve the memory if we are adding children of + // the main compile unit DIE. The compile unit DIE is always + // the first entry, so if our size is 1, then we are adding + // the first compile unit child DIE and should reserve + // the memory. + if (DieArray.empty()) + DieArray.reserve(getDebugInfoSize() / 14); + DieArray.push_back(die); + } + + void clearDIEs(bool keep_compile_unit_die); + + void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, + bool clear_dies_if_already_not_parsed); +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp new file mode 100644 index 0000000..e1ac398 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp @@ -0,0 +1,167 @@ +//===-- DWARFContext.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFContext.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; +using namespace dwarf; + +void DWARFContext::dump(raw_ostream &OS) { + OS << ".debug_abbrev contents:\n"; + getDebugAbbrev()->dump(OS); + + OS << "\n.debug_info contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) + getCompileUnitAtIndex(i)->dump(OS); + + OS << "\n.debug_aranges contents:\n"; + DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); + uint32_t offset = 0; + DWARFDebugArangeSet set; + while (set.extract(arangesData, &offset)) + set.dump(OS); + + OS << "\n.debug_lines contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { + DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + unsigned stmtOffset = + cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, + -1U); + if (stmtOffset != -1U) { + DataExtractor lineData(getLineSection(), isLittleEndian(), + cu->getAddressByteSize()); + DWARFDebugLine::DumpingState state(OS); + DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); + } + } + + OS << "\n.debug_str contents:\n"; + DataExtractor strData(getStringSection(), isLittleEndian(), 0); + offset = 0; + uint32_t lastOffset = 0; + while (const char *s = strData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", lastOffset, s); + lastOffset = offset; + } +} + +const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { + if (Abbrev) + return Abbrev.get(); + + DataExtractor abbrData(getAbbrevSection(), isLittleEndian(), 0); + + Abbrev.reset(new DWARFDebugAbbrev()); + Abbrev->parse(abbrData); + return Abbrev.get(); +} + +const DWARFDebugAranges *DWARFContext::getDebugAranges() { + if (Aranges) + return Aranges.get(); + + DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); + + Aranges.reset(new DWARFDebugAranges()); + Aranges->extract(arangesData); + if (Aranges->isEmpty()) // No aranges in file, generate them from the DIEs. + Aranges->generate(this); + return Aranges.get(); +} + +const DWARFDebugLine::LineTable * +DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { + if (!Line) + Line.reset(new DWARFDebugLine()); + + unsigned stmtOffset = + cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, + -1U); + if (stmtOffset == -1U) + return 0; // No line table for this compile unit. + + // See if the line table is cached. + if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset)) + return lt; + + // We have to parse it first. + DataExtractor lineData(getLineSection(), isLittleEndian(), + cu->getAddressByteSize()); + return Line->getOrParseLineTable(lineData, stmtOffset); +} + +void DWARFContext::parseCompileUnits() { + uint32_t offset = 0; + const DataExtractor &debug_info_data = DataExtractor(getInfoSection(), + isLittleEndian(), 0); + while (debug_info_data.isValidOffset(offset)) { + CUs.push_back(DWARFCompileUnit(*this)); + if (!CUs.back().extract(debug_info_data, &offset)) { + CUs.pop_back(); + break; + } + + offset = CUs.back().getNextCompileUnitOffset(); + } +} + +namespace { + struct OffsetComparator { + bool operator()(const DWARFCompileUnit &LHS, + const DWARFCompileUnit &RHS) const { + return LHS.getOffset() < RHS.getOffset(); + } + bool operator()(const DWARFCompileUnit &LHS, uint32_t RHS) const { + return LHS.getOffset() < RHS; + } + bool operator()(uint32_t LHS, const DWARFCompileUnit &RHS) const { + return LHS < RHS.getOffset(); + } + }; +} + +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { + if (CUs.empty()) + parseCompileUnits(); + + DWARFCompileUnit *i = std::lower_bound(CUs.begin(), CUs.end(), offset, + OffsetComparator()); + if (i != CUs.end()) + return &*i; + return 0; +} + +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) { + // First, get the offset of the compile unit. + uint32_t cuOffset = getDebugAranges()->findAddress(address); + // Retrieve the compile unit. + DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); + if (!cu) + return DILineInfo("<invalid>", 0, 0); + // Get the line table for this compile unit. + const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); + if (!lineTable) + return DILineInfo("<invalid>", 0, 0); + // Get the index of the row we're looking for in the line table. + uint64_t hiPC = + cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_high_pc, + -1ULL); + uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); + if (rowIndex == -1U) + return DILineInfo("<invalid>", 0, 0); + + // From here, contruct the DILineInfo. + const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; + const std::string &fileName = lineTable->Prologue.FileNames[row.File-1].Name; + + return DILineInfo(fileName.c_str(), row.Line, row.Column); +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h new file mode 100644 index 0000000..746a463 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h @@ -0,0 +1,118 @@ +//===-- DWARFContext.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===/ + +#ifndef LLVM_DEBUGINFO_DWARFCONTEXT_H +#define LLVM_DEBUGINFO_DWARFCONTEXT_H + +#include "DWARFCompileUnit.h" +#include "DWARFDebugAranges.h" +#include "DWARFDebugLine.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// DWARFContext +/// This data structure is the top level entity that deals with dwarf debug +/// information parsing. The actual data is supplied through pure virtual +/// methods that a concrete implementation provides. +class DWARFContext : public DIContext { + bool IsLittleEndian; + + SmallVector<DWARFCompileUnit, 1> CUs; + OwningPtr<DWARFDebugAbbrev> Abbrev; + OwningPtr<DWARFDebugAranges> Aranges; + OwningPtr<DWARFDebugLine> Line; + + DWARFContext(DWARFContext &); // = delete + DWARFContext &operator=(DWARFContext &); // = delete + + /// Read compile units from the debug_info section and store them in CUs. + void parseCompileUnits(); +protected: + DWARFContext(bool isLittleEndian) : IsLittleEndian(isLittleEndian) {} +public: + virtual void dump(raw_ostream &OS); + /// Get the number of compile units in this context. + unsigned getNumCompileUnits() { + if (CUs.empty()) + parseCompileUnits(); + return CUs.size(); + } + /// Get the compile unit at the specified index for this compile unit. + DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) { + if (CUs.empty()) + parseCompileUnits(); + return &CUs[index]; + } + + /// Return the compile unit that includes an offset (relative to .debug_info). + DWARFCompileUnit *getCompileUnitForOffset(uint32_t offset); + + /// Get a pointer to the parsed DebugAbbrev object. + const DWARFDebugAbbrev *getDebugAbbrev(); + + /// Get a pointer to the parsed DebugAranges object. + const DWARFDebugAranges *getDebugAranges(); + + /// Get a pointer to a parsed line table corresponding to a compile unit. + const DWARFDebugLine::LineTable * + getLineTableForCompileUnit(DWARFCompileUnit *cu); + + virtual DILineInfo getLineInfoForAddress(uint64_t address); + + bool isLittleEndian() const { return IsLittleEndian; } + + virtual StringRef getInfoSection() = 0; + virtual StringRef getAbbrevSection() = 0; + virtual StringRef getARangeSection() = 0; + virtual StringRef getLineSection() = 0; + virtual StringRef getStringSection() = 0; + + static bool isSupportedVersion(unsigned version) { + return version == 2 || version == 3; + } +}; + + +/// DWARFContextInMemory is the simplest possible implementation of a +/// DWARFContext. It assumes all content is available in memory and stores +/// pointers to it. +class DWARFContextInMemory : public DWARFContext { + StringRef InfoSection; + StringRef AbbrevSection; + StringRef ARangeSection; + StringRef LineSection; + StringRef StringSection; +public: + DWARFContextInMemory(bool isLittleEndian, + StringRef infoSection, + StringRef abbrevSection, + StringRef aRangeSection, + StringRef lineSection, + StringRef stringSection) + : DWARFContext(isLittleEndian), + InfoSection(infoSection), + AbbrevSection(abbrevSection), + ARangeSection(aRangeSection), + LineSection(lineSection), + StringSection(stringSection) + {} + + virtual StringRef getInfoSection() { return InfoSection; } + virtual StringRef getAbbrevSection() { return AbbrevSection; } + virtual StringRef getARangeSection() { return ARangeSection; } + virtual StringRef getLineSection() { return LineSection; } + virtual StringRef getStringSection() { return StringSection; } +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp new file mode 100644 index 0000000..a11ae3f --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp @@ -0,0 +1,106 @@ +//===-- DWARFDebugAbbrev.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugAbbrev.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +bool DWARFAbbreviationDeclarationSet::extract(DataExtractor data, + uint32_t* offset_ptr) { + const uint32_t beginOffset = *offset_ptr; + Offset = beginOffset; + clear(); + DWARFAbbreviationDeclaration abbrevDeclaration; + uint32_t prevAbbrAode = 0; + while (abbrevDeclaration.extract(data, offset_ptr)) { + Decls.push_back(abbrevDeclaration); + if (IdxOffset == 0) { + IdxOffset = abbrevDeclaration.getCode(); + } else { + if (prevAbbrAode + 1 != abbrevDeclaration.getCode()) + IdxOffset = UINT32_MAX;// Out of order indexes, we can't do O(1) lookups + } + prevAbbrAode = abbrevDeclaration.getCode(); + } + return beginOffset != *offset_ptr; +} + +void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const { + for (unsigned i = 0, e = Decls.size(); i != e; ++i) + Decls[i].dump(OS); +} + +const DWARFAbbreviationDeclaration* +DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(uint32_t abbrCode) + const { + if (IdxOffset == UINT32_MAX) { + DWARFAbbreviationDeclarationCollConstIter pos; + DWARFAbbreviationDeclarationCollConstIter end = Decls.end(); + for (pos = Decls.begin(); pos != end; ++pos) { + if (pos->getCode() == abbrCode) + return &(*pos); + } + } else { + uint32_t idx = abbrCode - IdxOffset; + if (idx < Decls.size()) + return &Decls[idx]; + } + return NULL; +} + +DWARFDebugAbbrev::DWARFDebugAbbrev() : + AbbrevCollMap(), + PrevAbbrOffsetPos(AbbrevCollMap.end()) {} + + +void DWARFDebugAbbrev::parse(DataExtractor data) { + uint32_t offset = 0; + + while (data.isValidOffset(offset)) { + uint32_t initial_cu_offset = offset; + DWARFAbbreviationDeclarationSet abbrevDeclSet; + + if (abbrevDeclSet.extract(data, &offset)) + AbbrevCollMap[initial_cu_offset] = abbrevDeclSet; + else + break; + } + PrevAbbrOffsetPos = AbbrevCollMap.end(); +} + +void DWARFDebugAbbrev::dump(raw_ostream &OS) const { + if (AbbrevCollMap.empty()) { + OS << "< EMPTY >\n"; + return; + } + + DWARFAbbreviationDeclarationCollMapConstIter pos; + for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) { + OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first); + pos->second.dump(OS); + } +} + +const DWARFAbbreviationDeclarationSet* +DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const { + DWARFAbbreviationDeclarationCollMapConstIter end = AbbrevCollMap.end(); + DWARFAbbreviationDeclarationCollMapConstIter pos; + if (PrevAbbrOffsetPos != end && + PrevAbbrOffsetPos->first == cu_abbr_offset) { + return &(PrevAbbrOffsetPos->second); + } else { + pos = AbbrevCollMap.find(cu_abbr_offset); + PrevAbbrOffsetPos = pos; + } + + if (pos != AbbrevCollMap.end()) + return &(pos->second); + return NULL; +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h new file mode 100644 index 0000000..03189b1 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h @@ -0,0 +1,73 @@ +//===-- DWARFDebugAbbrev.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGABBREV_H +#define LLVM_DEBUGINFO_DWARFDEBUGABBREV_H + +#include "DWARFAbbreviationDeclaration.h" +#include <list> +#include <map> +#include <vector> + +namespace llvm { + +typedef std::vector<DWARFAbbreviationDeclaration> + DWARFAbbreviationDeclarationColl; +typedef DWARFAbbreviationDeclarationColl::iterator + DWARFAbbreviationDeclarationCollIter; +typedef DWARFAbbreviationDeclarationColl::const_iterator + DWARFAbbreviationDeclarationCollConstIter; + +class DWARFAbbreviationDeclarationSet { + uint64_t Offset; + uint32_t IdxOffset; + std::vector<DWARFAbbreviationDeclaration> Decls; + public: + DWARFAbbreviationDeclarationSet() + : Offset(0), IdxOffset(0) {} + + DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset) + : Offset(offset), IdxOffset(idxOffset) {} + + void clear() { + IdxOffset = 0; + Decls.clear(); + } + uint64_t getOffset() const { return Offset; } + void dump(raw_ostream &OS) const; + bool extract(DataExtractor data, uint32_t* offset_ptr); + + const DWARFAbbreviationDeclaration * + getAbbreviationDeclaration(uint32_t abbrCode) const; +}; + +class DWARFDebugAbbrev { +public: + typedef std::map<uint64_t, DWARFAbbreviationDeclarationSet> + DWARFAbbreviationDeclarationCollMap; + typedef DWARFAbbreviationDeclarationCollMap::iterator + DWARFAbbreviationDeclarationCollMapIter; + typedef DWARFAbbreviationDeclarationCollMap::const_iterator + DWARFAbbreviationDeclarationCollMapConstIter; + +private: + DWARFAbbreviationDeclarationCollMap AbbrevCollMap; + mutable DWARFAbbreviationDeclarationCollMapConstIter PrevAbbrOffsetPos; + +public: + DWARFDebugAbbrev(); + const DWARFAbbreviationDeclarationSet * + getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const; + void dump(raw_ostream &OS) const; + void parse(DataExtractor data); +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp new file mode 100644 index 0000000..b0c0354 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -0,0 +1,150 @@ +//===-- DWARFDebugArangeSet.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugArangeSet.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +using namespace llvm; + +void DWARFDebugArangeSet::clear() { + Offset = -1U; + std::memset(&Header, 0, sizeof(Header)); + ArangeDescriptors.clear(); +} + +void DWARFDebugArangeSet::compact() { + if (ArangeDescriptors.empty()) + return; + + // Iterate through all arange descriptors and combine any ranges that + // overlap or have matching boundaries. The ArangeDescriptors are assumed + // to be in ascending order. + uint32_t i = 0; + while (i + 1 < ArangeDescriptors.size()) { + if (ArangeDescriptors[i].getEndAddress() >= ArangeDescriptors[i+1].Address){ + // The current range ends at or exceeds the start of the next address + // range. Compute the max end address between the two and use that to + // make the new length. + const uint64_t max_end_addr = + std::max(ArangeDescriptors[i].getEndAddress(), + ArangeDescriptors[i+1].getEndAddress()); + ArangeDescriptors[i].Length = max_end_addr - ArangeDescriptors[i].Address; + // Now remove the next entry as it was just combined with the previous one + ArangeDescriptors.erase(ArangeDescriptors.begin()+i+1); + } else { + // Discontiguous address range, just proceed to the next one. + ++i; + } + } +} + +bool +DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { + if (data.isValidOffset(*offset_ptr)) { + ArangeDescriptors.clear(); + Offset = *offset_ptr; + + // 7.20 Address Range Table + // + // Each set of entries in the table of address ranges contained in + // the .debug_aranges section begins with a header consisting of: a + // 4-byte length containing the length of the set of entries for this + // compilation unit, not including the length field itself; a 2-byte + // version identifier containing the value 2 for DWARF Version 2; a + // 4-byte offset into the.debug_infosection; a 1-byte unsigned integer + // containing the size in bytes of an address (or the offset portion of + // an address for segmented addressing) on the target system; and a + // 1-byte unsigned integer containing the size in bytes of a segment + // descriptor on the target system. This header is followed by a series + // of tuples. Each tuple consists of an address and a length, each in + // the size appropriate for an address on the target architecture. + Header.Length = data.getU32(offset_ptr); + Header.Version = data.getU16(offset_ptr); + Header.CuOffset = data.getU32(offset_ptr); + Header.AddrSize = data.getU8(offset_ptr); + Header.SegSize = data.getU8(offset_ptr); + + // Perform basic validation of the header fields. + if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) || + (Header.AddrSize != 4 && Header.AddrSize != 8)) { + clear(); + return false; + } + + // The first tuple following the header in each set begins at an offset + // that is a multiple of the size of a single tuple (that is, twice the + // size of an address). The header is padded, if necessary, to the + // appropriate boundary. + const uint32_t header_size = *offset_ptr - Offset; + const uint32_t tuple_size = Header.AddrSize * 2; + uint32_t first_tuple_offset = 0; + while (first_tuple_offset < header_size) + first_tuple_offset += tuple_size; + + *offset_ptr = Offset + first_tuple_offset; + + Descriptor arangeDescriptor; + + assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length)); + assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize); + + while (data.isValidOffset(*offset_ptr)) { + arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize); + arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize); + + // Each set of tuples is terminated by a 0 for the address and 0 + // for the length. + if (arangeDescriptor.Address || arangeDescriptor.Length) + ArangeDescriptors.push_back(arangeDescriptor); + else + break; // We are done if we get a zero address and length + } + + return !ArangeDescriptors.empty(); + } + return false; +} + +void DWARFDebugArangeSet::dump(raw_ostream &OS) const { + OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ", + Header.Length, Header.Version) + << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", + Header.CuOffset, Header.AddrSize, Header.SegSize); + + const uint32_t hex_width = Header.AddrSize * 2; + for (DescriptorConstIter pos = ArangeDescriptors.begin(), + end = ArangeDescriptors.end(); pos != end; ++pos) + OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address) + << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress()); +} + + +namespace { + class DescriptorContainsAddress { + const uint64_t Address; + public: + DescriptorContainsAddress(uint64_t address) : Address(address) {} + bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const { + return Address >= desc.Address && Address < (desc.Address + desc.Length); + } + }; +} + +uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const { + DescriptorConstIter end = ArangeDescriptors.end(); + DescriptorConstIter pos = + std::find_if(ArangeDescriptors.begin(), end, // Range + DescriptorContainsAddress(address)); // Predicate + if (pos != end) + return Header.CuOffset; + + return -1U; +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h new file mode 100644 index 0000000..9a2a6d0 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h @@ -0,0 +1,75 @@ +//===-- DWARFDebugArangeSet.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H +#define LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H + +#include "llvm/Support/DataExtractor.h" +#include <vector> + +namespace llvm { + +class raw_ostream; + +class DWARFDebugArangeSet { +public: + struct Header { + // The total length of the entries for that set, not including the length + // field itself. + uint32_t Length; + // The offset from the beginning of the .debug_info section of the + // compilation unit entry referenced by the table. + uint32_t CuOffset; + // The DWARF version number. + uint16_t Version; + // The size in bytes of an address on the target architecture. For segmented + // addressing, this is the size of the offset portion of the address. + uint8_t AddrSize; + // The size in bytes of a segment descriptor on the target architecture. + // If the target system uses a flat address space, this value is 0. + uint8_t SegSize; + }; + + struct Descriptor { + uint64_t Address; + uint64_t Length; + uint64_t getEndAddress() const { return Address + Length; } + }; + +private: + typedef std::vector<Descriptor> DescriptorColl; + typedef DescriptorColl::iterator DescriptorIter; + typedef DescriptorColl::const_iterator DescriptorConstIter; + + uint32_t Offset; + Header Header; + DescriptorColl ArangeDescriptors; + +public: + DWARFDebugArangeSet() { clear(); } + void clear(); + void compact(); + bool extract(DataExtractor data, uint32_t *offset_ptr); + void dump(raw_ostream &OS) const; + + uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; } + uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; } + uint32_t findAddress(uint64_t address) const; + uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } + const struct Header &getHeader() const { return Header; } + const Descriptor *getDescriptor(uint32_t i) const { + if (i < ArangeDescriptors.size()) + return &ArangeDescriptors[i]; + return NULL; + } +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp new file mode 100644 index 0000000..576d37d --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp @@ -0,0 +1,223 @@ +//===-- DWARFDebugAranges.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugAranges.h" +#include "DWARFCompileUnit.h" +#include "DWARFContext.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +using namespace llvm; + +// Compare function DWARFDebugAranges::Range structures +static bool RangeLessThan(const DWARFDebugAranges::Range &range1, + const DWARFDebugAranges::Range &range2) { + return range1.LoPC < range2.LoPC; +} + +namespace { + class CountArangeDescriptors { + public: + CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {} + void operator()(const DWARFDebugArangeSet &set) { + Count += set.getNumDescriptors(); + } + uint32_t &Count; + }; + + class AddArangeDescriptors { + public: + AddArangeDescriptors(DWARFDebugAranges::RangeColl &ranges) + : RangeCollection(ranges) {} + void operator()(const DWARFDebugArangeSet& set) { + const DWARFDebugArangeSet::Descriptor* arange_desc_ptr; + DWARFDebugAranges::Range range; + range.Offset = set.getCompileUnitDIEOffset(); + + for (uint32_t i=0; (arange_desc_ptr = set.getDescriptor(i)) != NULL; ++i){ + range.LoPC = arange_desc_ptr->Address; + range.Length = arange_desc_ptr->Length; + + // Insert each item in increasing address order so binary searching + // can later be done! + DWARFDebugAranges::RangeColl::iterator insert_pos = + std::lower_bound(RangeCollection.begin(), RangeCollection.end(), + range, RangeLessThan); + RangeCollection.insert(insert_pos, range); + } + } + DWARFDebugAranges::RangeColl& RangeCollection; + }; +} + +bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { + if (debug_aranges_data.isValidOffset(0)) { + uint32_t offset = 0; + + typedef std::vector<DWARFDebugArangeSet> SetCollection; + typedef SetCollection::const_iterator SetCollectionIter; + SetCollection sets; + + DWARFDebugArangeSet set; + Range range; + while (set.extract(debug_aranges_data, &offset)) + sets.push_back(set); + + uint32_t count = 0; + + std::for_each(sets.begin(), sets.end(), CountArangeDescriptors(count)); + + if (count > 0) { + Aranges.reserve(count); + AddArangeDescriptors range_adder(Aranges); + std::for_each(sets.begin(), sets.end(), range_adder); + } + } + return false; +} + +bool DWARFDebugAranges::generate(DWARFContext *ctx) { + clear(); + if (ctx) { + const uint32_t num_compile_units = ctx->getNumCompileUnits(); + for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) { + DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx); + if (cu) + cu->buildAddressRangeTable(this, true); + } + } + return !isEmpty(); +} + +void DWARFDebugAranges::dump(raw_ostream &OS) const { + const uint32_t num_ranges = getNumRanges(); + for (uint32_t i = 0; i < num_ranges; ++i) { + const Range &range = Aranges[i]; + OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset, + (uint64_t)range.LoPC, (uint64_t)range.HiPC()); + } +} + +void DWARFDebugAranges::Range::dump(raw_ostream &OS) const { + OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC()); +} + +void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc, + uint64_t high_pc) { + if (!Aranges.empty()) { + if (Aranges.back().Offset == offset && Aranges.back().HiPC() == low_pc) { + Aranges.back().setHiPC(high_pc); + return; + } + } + Aranges.push_back(Range(low_pc, high_pc, offset)); +} + +void DWARFDebugAranges::sort(bool minimize, uint32_t n) { + const size_t orig_arange_size = Aranges.size(); + // Size of one? If so, no sorting is needed + if (orig_arange_size <= 1) + return; + // Sort our address range entries + std::stable_sort(Aranges.begin(), Aranges.end(), RangeLessThan); + + if (!minimize) + return; + + // Most address ranges are contiguous from function to function + // so our new ranges will likely be smaller. We calculate the size + // of the new ranges since although std::vector objects can be resized, + // the will never reduce their allocated block size and free any excesss + // memory, so we might as well start a brand new collection so it is as + // small as possible. + + // First calculate the size of the new minimal arange vector + // so we don't have to do a bunch of re-allocations as we + // copy the new minimal stuff over to the new collection. + size_t minimal_size = 1; + for (size_t i = 1; i < orig_arange_size; ++i) { + if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i], n)) + ++minimal_size; + } + + // If the sizes are the same, then no consecutive aranges can be + // combined, we are done. + if (minimal_size == orig_arange_size) + return; + + // Else, make a new RangeColl that _only_ contains what we need. + RangeColl minimal_aranges; + minimal_aranges.resize(minimal_size); + uint32_t j = 0; + minimal_aranges[j] = Aranges[0]; + for (size_t i = 1; i < orig_arange_size; ++i) { + if(Range::SortedOverlapCheck (minimal_aranges[j], Aranges[i], n)) { + minimal_aranges[j].setHiPC (Aranges[i].HiPC()); + } else { + // Only increment j if we aren't merging + minimal_aranges[++j] = Aranges[i]; + } + } + assert (j+1 == minimal_size); + + // Now swap our new minimal aranges into place. The local + // minimal_aranges will then contian the old big collection + // which will get freed. + minimal_aranges.swap(Aranges); +} + +uint32_t DWARFDebugAranges::findAddress(uint64_t address) const { + if (!Aranges.empty()) { + Range range(address); + RangeCollIterator begin = Aranges.begin(); + RangeCollIterator end = Aranges.end(); + RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan); + + if (pos != end && pos->LoPC <= address && address < pos->HiPC()) { + return pos->Offset; + } else if (pos != begin) { + --pos; + if (pos->LoPC <= address && address < pos->HiPC()) + return (*pos).Offset; + } + } + return -1U; +} + +bool +DWARFDebugAranges::allRangesAreContiguous(uint64_t &LoPC, uint64_t &HiPC) const{ + if (Aranges.empty()) + return false; + + uint64_t next_addr = 0; + RangeCollIterator begin = Aranges.begin(); + for (RangeCollIterator pos = begin, end = Aranges.end(); pos != end; + ++pos) { + if (pos != begin && pos->LoPC != next_addr) + return false; + next_addr = pos->HiPC(); + } + // We checked for empty at the start of function so front() will be valid. + LoPC = Aranges.front().LoPC; + // We checked for empty at the start of function so back() will be valid. + HiPC = Aranges.back().HiPC(); + return true; +} + +bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const { + if (Aranges.empty()) + return false; + // We checked for empty at the start of function so front() will be valid. + LoPC = Aranges.front().LoPC; + // We checked for empty at the start of function so back() will be valid. + HiPC = Aranges.back().HiPC(); + return true; +} + diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h new file mode 100644 index 0000000..12afb60 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h @@ -0,0 +1,98 @@ +//===-- DWARFDebugAranges.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H +#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H + +#include "DWARFDebugArangeSet.h" +#include <list> + +namespace llvm { + +class DWARFContext; + +class DWARFDebugAranges { +public: + struct Range { + explicit Range(uint64_t lo = -1ULL, uint64_t hi = -1ULL, + uint32_t off = -1U) + : LoPC(lo), Length(hi-lo), Offset(off) {} + + void clear() { + LoPC = -1ULL; + Length = 0; + Offset = -1U; + } + + void setHiPC(uint64_t HiPC) { + if (HiPC == -1ULL || HiPC <= LoPC) + Length = 0; + else + Length = HiPC - LoPC; + } + uint64_t HiPC() const { + if (Length) + return LoPC + Length; + return -1ULL; + } + bool isValidRange() const { return Length > 0; } + + static bool SortedOverlapCheck(const Range &curr_range, + const Range &next_range, uint32_t n) { + if (curr_range.Offset != next_range.Offset) + return false; + return curr_range.HiPC() + n >= next_range.LoPC; + } + + bool contains(const Range &range) const { + return LoPC <= range.LoPC && range.HiPC() <= HiPC(); + } + + void dump(raw_ostream &OS) const; + uint64_t LoPC; // Start of address range + uint32_t Length; // End of address range (not including this address) + uint32_t Offset; // Offset of the compile unit or die + }; + + void clear() { Aranges.clear(); } + bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const; + bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const; + bool extract(DataExtractor debug_aranges_data); + bool generate(DWARFContext *ctx); + + // Use append range multiple times and then call sort + void appendRange(uint32_t cu_offset, uint64_t low_pc, uint64_t high_pc); + void sort(bool minimize, uint32_t n); + + const Range *rangeAtIndex(uint32_t idx) const { + if (idx < Aranges.size()) + return &Aranges[idx]; + return NULL; + } + void dump(raw_ostream &OS) const; + uint32_t findAddress(uint64_t address) const; + bool isEmpty() const { return Aranges.empty(); } + uint32_t getNumRanges() const { return Aranges.size(); } + + uint32_t offsetAtIndex(uint32_t idx) const { + if (idx < Aranges.size()) + return Aranges[idx].Offset; + return -1U; + } + + typedef std::vector<Range> RangeColl; + typedef RangeColl::const_iterator RangeCollIterator; + +private: + RangeColl Aranges; +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp new file mode 100644 index 0000000..1b089ad --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -0,0 +1,444 @@ +//===-- DWARFDebugInfoEntry.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugInfoEntry.h" +#include "DWARFCompileUnit.h" +#include "DWARFContext.h" +#include "DWARFDebugAbbrev.h" +#include "DWARFFormValue.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace dwarf; + +void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, + const DWARFCompileUnit *cu, + unsigned recurseDepth, + unsigned indent) const { + DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + uint32_t offset = Offset; + + if (debug_info_data.isValidOffset(offset)) { + uint64_t abbrCode = debug_info_data.getULEB128(&offset); + + OS << format("\n0x%8.8x: ", Offset); + if (abbrCode) { + if (AbbrevDecl) { + const char *tagString = TagString(getTag()); + if (tagString) + OS.indent(indent) << tagString; + else + OS.indent(indent) << format("DW_TAG_Unknown_%x", getTag()); + OS << format(" [%u] %c\n", abbrCode, + AbbrevDecl->hasChildren() ? '*' : ' '); + + // Dump all data in the .debug_info for the attributes + const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); + for (uint32_t i = 0; i != numAttributes; ++i) { + uint16_t attr = AbbrevDecl->getAttrByIndex(i); + uint16_t form = AbbrevDecl->getFormByIndex(i); + dumpAttribute(OS, cu, &offset, attr, form, indent); + } + + const DWARFDebugInfoEntryMinimal *child = getFirstChild(); + if (recurseDepth > 0 && child) { + while (child) { + child->dump(OS, cu, recurseDepth-1, indent+2); + child = child->getSibling(); + } + } + } else { + OS << "Abbreviation code not found in 'debug_abbrev' class for code: " + << abbrCode << '\n'; + } + } else { + OS.indent(indent) << "NULL\n"; + } + } +} + +void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, + const DWARFCompileUnit *cu, + uint32_t* offset_ptr, + uint16_t attr, + uint16_t form, + unsigned indent) const { + OS << format("0x%8.8x: ", *offset_ptr); + OS.indent(indent+2); + const char *attrString = AttributeString(attr); + if (attrString) + OS << attrString; + else + OS << format("DW_AT_Unknown_%x", attr); + const char *formString = FormEncodingString(form); + if (formString) + OS << " [" << formString << ']'; + else + OS << format(" [DW_FORM_Unknown_%x]", form); + + DWARFFormValue formValue(form); + + if (!formValue.extractValue(cu->getDebugInfoExtractor(), offset_ptr, cu)) + return; + + OS << "\t("; + formValue.dump(OS, cu); + OS << ")\n"; +} + +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, + const uint8_t *fixed_form_sizes, + uint32_t *offset_ptr) { + Offset = *offset_ptr; + + DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr); + + assert (fixed_form_sizes); // For best performance this should be specified! + + if (abbrCode) { + uint32_t offset = *offset_ptr; + + AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode); + + // Skip all data in the .debug_info for the attributes + const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); + uint32_t i; + uint16_t form; + for (i=0; i<numAttributes; ++i) { + form = AbbrevDecl->getFormByIndex(i); + + const uint8_t fixed_skip_size = fixed_form_sizes[form]; + if (fixed_skip_size) + offset += fixed_skip_size; + else { + bool form_is_indirect = false; + do { + form_is_indirect = false; + uint32_t form_size = 0; + switch (form) { + // Blocks if inlined data that have a length field and the data bytes + // inlined in the .debug_info. + case DW_FORM_block: + form_size = debug_info_data.getULEB128(&offset); + break; + case DW_FORM_block1: + form_size = debug_info_data.getU8(&offset); + break; + case DW_FORM_block2: + form_size = debug_info_data.getU16(&offset); + break; + case DW_FORM_block4: + form_size = debug_info_data.getU32(&offset); + break; + + // Inlined NULL terminated C-strings + case DW_FORM_string: + debug_info_data.getCStr(&offset); + break; + + // Compile unit address sized values + case DW_FORM_addr: + case DW_FORM_ref_addr: + form_size = cu->getAddressByteSize(); + break; + + // 1 byte values + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + form_size = 1; + break; + + // 2 byte values + case DW_FORM_data2: + case DW_FORM_ref2: + form_size = 2; + break; + + // 4 byte values + case DW_FORM_strp: + case DW_FORM_data4: + case DW_FORM_ref4: + form_size = 4; + break; + + // 8 byte values + case DW_FORM_data8: + case DW_FORM_ref8: + form_size = 8; + break; + + // signed or unsigned LEB 128 values + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + debug_info_data.getULEB128(&offset); + break; + + case DW_FORM_indirect: + form_is_indirect = true; + form = debug_info_data.getULEB128(&offset); + break; + + default: + *offset_ptr = Offset; + return false; + } + offset += form_size; + + } while (form_is_indirect); + } + } + *offset_ptr = offset; + return true; + } else { + AbbrevDecl = NULL; + return true; // NULL debug tag entry + } + + return false; +} + +bool +DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, + uint32_t *offset_ptr) { + DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + const uint32_t cu_end_offset = cu->getNextCompileUnitOffset(); + const uint8_t cu_addr_size = cu->getAddressByteSize(); + uint32_t offset = *offset_ptr; + if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) { + Offset = offset; + + uint64_t abbrCode = debug_info_data.getULEB128(&offset); + + if (abbrCode) { + AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode); + + if (AbbrevDecl) { + uint16_t tag = AbbrevDecl->getTag(); + + bool isCompileUnitTag = tag == DW_TAG_compile_unit; + if(cu && isCompileUnitTag) + const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0); + + // Skip all data in the .debug_info for the attributes + const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); + for (uint32_t i = 0; i != numAttributes; ++i) { + uint16_t attr = AbbrevDecl->getAttrByIndex(i); + uint16_t form = AbbrevDecl->getFormByIndex(i); + + if (isCompileUnitTag && + ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) { + DWARFFormValue form_value(form); + if (form_value.extractValue(debug_info_data, &offset, cu)) { + if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc) + const_cast<DWARFCompileUnit*>(cu) + ->setBaseAddress(form_value.getUnsigned()); + } + } else { + bool form_is_indirect = false; + do { + form_is_indirect = false; + register uint32_t form_size = 0; + switch (form) { + // Blocks if inlined data that have a length field and the data + // bytes // inlined in the .debug_info + case DW_FORM_block: + form_size = debug_info_data.getULEB128(&offset); + break; + case DW_FORM_block1: + form_size = debug_info_data.getU8(&offset); + break; + case DW_FORM_block2: + form_size = debug_info_data.getU16(&offset); + break; + case DW_FORM_block4: + form_size = debug_info_data.getU32(&offset); + break; + + // Inlined NULL terminated C-strings + case DW_FORM_string: + debug_info_data.getCStr(&offset); + break; + + // Compile unit address sized values + case DW_FORM_addr: + case DW_FORM_ref_addr: + form_size = cu_addr_size; + break; + + // 1 byte values + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + form_size = 1; + break; + + // 2 byte values + case DW_FORM_data2: + case DW_FORM_ref2: + form_size = 2; + break; + + // 4 byte values + case DW_FORM_strp: + form_size = 4; + break; + + case DW_FORM_data4: + case DW_FORM_ref4: + form_size = 4; + break; + + // 8 byte values + case DW_FORM_data8: + case DW_FORM_ref8: + form_size = 8; + break; + + // signed or unsigned LEB 128 values + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + debug_info_data.getULEB128(&offset); + break; + + case DW_FORM_indirect: + form = debug_info_data.getULEB128(&offset); + form_is_indirect = true; + break; + + default: + *offset_ptr = offset; + return false; + } + + offset += form_size; + } while (form_is_indirect); + } + } + *offset_ptr = offset; + return true; + } + } else { + AbbrevDecl = NULL; + *offset_ptr = offset; + return true; // NULL debug tag entry + } + } + + return false; +} + +uint32_t +DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, + const uint16_t attr, + DWARFFormValue &form_value, + uint32_t *end_attr_offset_ptr) + const { + if (AbbrevDecl) { + uint32_t attr_idx = AbbrevDecl->findAttributeIndex(attr); + + if (attr_idx != -1U) { + uint32_t offset = getOffset(); + + DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + + // Skip the abbreviation code so we are at the data for the attributes + debug_info_data.getULEB128(&offset); + + uint32_t idx = 0; + while (idx < attr_idx) + DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(idx++), + debug_info_data, &offset, cu); + + const uint32_t attr_offset = offset; + form_value = DWARFFormValue(AbbrevDecl->getFormByIndex(idx)); + if (form_value.extractValue(debug_info_data, &offset, cu)) { + if (end_attr_offset_ptr) + *end_attr_offset_ptr = offset; + return attr_offset; + } + } + } + + return 0; +} + +const char* +DWARFDebugInfoEntryMinimal::getAttributeValueAsString( + const DWARFCompileUnit* cu, + const uint16_t attr, + const char* fail_value) const { + DWARFFormValue form_value; + if (getAttributeValue(cu, attr, form_value)) { + DataExtractor stringExtractor(cu->getContext().getStringSection(), + false, 0); + return form_value.getAsCString(&stringExtractor); + } + return fail_value; +} + +uint64_t +DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) const { + DWARFFormValue form_value; + if (getAttributeValue(cu, attr, form_value)) + return form_value.getUnsigned(); + return fail_value; +} + +int64_t +DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( + const DWARFCompileUnit* cu, + const uint16_t attr, + int64_t fail_value) const { + DWARFFormValue form_value; + if (getAttributeValue(cu, attr, form_value)) + return form_value.getSigned(); + return fail_value; +} + +uint64_t +DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) const { + DWARFFormValue form_value; + if (getAttributeValue(cu, attr, form_value)) + return form_value.getReference(cu); + return fail_value; +} + +void +DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, + DWARFDebugAranges *debug_aranges) + const { + if (AbbrevDecl) { + uint16_t tag = AbbrevDecl->getTag(); + if (tag == DW_TAG_subprogram) { + uint64_t hi_pc = -1ULL; + uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); + if (lo_pc != -1ULL) + hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); + if (hi_pc != -1ULL) + debug_aranges->appendRange(cu->getOffset(), lo_pc, hi_pc); + } + + const DWARFDebugInfoEntryMinimal *child = getFirstChild(); + while (child) { + child->buildAddressRangeTable(cu, debug_aranges); + child = child->getSibling(); + } + } +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h new file mode 100644 index 0000000..aff2e85 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -0,0 +1,135 @@ +//===-- DWARFDebugInfoEntry.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H + +#include "DWARFAbbreviationDeclaration.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class DWARFDebugAranges; +class DWARFCompileUnit; +class DWARFContext; +class DWARFFormValue; + +/// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. +class DWARFDebugInfoEntryMinimal { + /// Offset within the .debug_info of the start of this entry. + uint64_t Offset; + + /// How many to subtract from "this" to get the parent. + /// If zero this die has no parent. + uint32_t ParentIdx; + + /// How many to add to "this" to get the sibling. + uint32_t SiblingIdx; + + const DWARFAbbreviationDeclaration *AbbrevDecl; +public: + DWARFDebugInfoEntryMinimal() + : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {} + + void dump(raw_ostream &OS, const DWARFCompileUnit *cu, + unsigned recurseDepth, unsigned indent = 0) const; + void dumpAttribute(raw_ostream &OS, const DWARFCompileUnit *cu, + uint32_t *offset_ptr, uint16_t attr, uint16_t form, + unsigned indent = 0) const; + + bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes, + uint32_t *offset_ptr); + + /// Extract a debug info entry for a given compile unit from the + /// .debug_info and .debug_abbrev data starting at the given offset. + bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr); + + uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } + bool isNULL() const { return AbbrevDecl == 0; } + uint64_t getOffset() const { return Offset; } + uint32_t getNumAttributes() const { + return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; + } + bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); } + + // We know we are kept in a vector of contiguous entries, so we know + // our parent will be some index behind "this". + DWARFDebugInfoEntryMinimal *getParent() { + return ParentIdx > 0 ? this - ParentIdx : 0; + } + const DWARFDebugInfoEntryMinimal *getParent() const { + return ParentIdx > 0 ? this - ParentIdx : 0; + } + // We know we are kept in a vector of contiguous entries, so we know + // our sibling will be some index after "this". + DWARFDebugInfoEntryMinimal *getSibling() { + return SiblingIdx > 0 ? this + SiblingIdx : 0; + } + const DWARFDebugInfoEntryMinimal *getSibling() const { + return SiblingIdx > 0 ? this + SiblingIdx : 0; + } + // We know we are kept in a vector of contiguous entries, so we know + // we don't need to store our child pointer, if we have a child it will + // be the next entry in the list... + DWARFDebugInfoEntryMinimal *getFirstChild() { + return hasChildren() ? this + 1 : 0; + } + const DWARFDebugInfoEntryMinimal *getFirstChild() const { + return hasChildren() ? this + 1 : 0; + } + + void setParent(DWARFDebugInfoEntryMinimal *parent) { + if (parent) { + // We know we are kept in a vector of contiguous entries, so we know + // our parent will be some index behind "this". + ParentIdx = this - parent; + } else + ParentIdx = 0; + } + void setSibling(DWARFDebugInfoEntryMinimal *sibling) { + if (sibling) { + // We know we are kept in a vector of contiguous entries, so we know + // our sibling will be some index after "this". + SiblingIdx = sibling - this; + sibling->setParent(getParent()); + } else + SiblingIdx = 0; + } + + const DWARFAbbreviationDeclaration *getAbbreviationDeclarationPtr() const { + return AbbrevDecl; + } + + uint32_t getAttributeValue(const DWARFCompileUnit *cu, + const uint16_t attr, DWARFFormValue &formValue, + uint32_t *end_attr_offset_ptr = 0) const; + + const char* getAttributeValueAsString(const DWARFCompileUnit* cu, + const uint16_t attr, + const char *fail_value) const; + + uint64_t getAttributeValueAsUnsigned(const DWARFCompileUnit *cu, + const uint16_t attr, + uint64_t fail_value) const; + + uint64_t getAttributeValueAsReference(const DWARFCompileUnit *cu, + const uint16_t attr, + uint64_t fail_value) const; + + int64_t getAttributeValueAsSigned(const DWARFCompileUnit* cu, + const uint16_t attr, + int64_t fail_value) const; + + void buildAddressRangeTable(const DWARFCompileUnit *cu, + DWARFDebugAranges *debug_aranges) const; +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp new file mode 100644 index 0000000..fe1ef78 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp @@ -0,0 +1,475 @@ +//===-- DWARFDebugLine.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugLine.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; +using namespace dwarf; + +void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { + OS << "Line table prologue:\n" + << format(" total_length: 0x%8.8x\n", TotalLength) + << format(" version: %u\n", Version) + << format("prologue_length: 0x%8.8x\n", PrologueLength) + << format("min_inst_length: %u\n", MinInstLength) + << format("default_is_stmt: %u\n", DefaultIsStmt) + << format(" line_base: %i\n", LineBase) + << format(" line_range: %u\n", LineRange) + << format(" opcode_base: %u\n", OpcodeBase); + + for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i) + OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(i+1), + StandardOpcodeLengths[i]); + + if (!IncludeDirectories.empty()) + for (uint32_t i = 0; i < IncludeDirectories.size(); ++i) + OS << format("include_directories[%3u] = '", i+1) + << IncludeDirectories[i] << "'\n"; + + if (!FileNames.empty()) { + OS << " Dir Mod Time File Len File Name\n" + << " ---- ---------- ---------- -----------" + "----------------\n"; + for (uint32_t i = 0; i < FileNames.size(); ++i) { + const FileNameEntry& fileEntry = FileNames[i]; + OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx) + << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length) + << fileEntry.Name << '\n'; + } + } +} + +void DWARFDebugLine::Row::postAppend() { + BasicBlock = false; + PrologueEnd = false; + EpilogueBegin = false; +} + +void DWARFDebugLine::Row::reset(bool default_is_stmt) { + Address = 0; + Line = 1; + Column = 0; + File = 1; + Isa = 0; + IsStmt = default_is_stmt; + BasicBlock = false; + EndSequence = false; + PrologueEnd = false; + EpilogueBegin = false; +} + +void DWARFDebugLine::Row::dump(raw_ostream &OS) const { + OS << format("0x%16.16llx %6u %6u", Address, Line, Column) + << format(" %6u %3u ", File, Isa) + << (IsStmt ? " is_stmt" : "") + << (BasicBlock ? " basic_block" : "") + << (PrologueEnd ? " prologue_end" : "") + << (EpilogueBegin ? " epilogue_begin" : "") + << (EndSequence ? " end_sequence" : "") + << '\n'; +} + +void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { + Prologue.dump(OS); + OS << '\n'; + + if (!Rows.empty()) { + OS << "Address Line Column File ISA Flags\n" + << "------------------ ------ ------ ------ --- -------------\n"; + for (std::vector<Row>::const_iterator pos = Rows.begin(), + end = Rows.end(); pos != end; ++pos) + pos->dump(OS); + } +} + +DWARFDebugLine::State::~State() {} + +void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) { + ++row; // Increase the row number. + LineTable::appendRow(*this); + Row::postAppend(); +} + +DWARFDebugLine::DumpingState::~DumpingState() {} + +void DWARFDebugLine::DumpingState::finalize(uint32_t offset) { + LineTable::dump(OS); +} + +const DWARFDebugLine::LineTable * +DWARFDebugLine::getLineTable(uint32_t offset) const { + LineTableConstIter pos = LineTableMap.find(offset); + if (pos != LineTableMap.end()) + return &pos->second; + return 0; +} + +const DWARFDebugLine::LineTable * +DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, + uint32_t offset) { + std::pair<LineTableIter, bool> pos = + LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable())); + if (pos.second) { + // Parse and cache the line table for at this offset. + State state; + if (!parseStatementTable(debug_line_data, &offset, state)) + return 0; + pos.first->second = state; + } + return &pos.first->second; +} + +bool +DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, + uint32_t *offset_ptr, Prologue *prologue) { + const uint32_t prologue_offset = *offset_ptr; + + prologue->clear(); + prologue->TotalLength = debug_line_data.getU32(offset_ptr); + prologue->Version = debug_line_data.getU16(offset_ptr); + if (prologue->Version != 2) + return false; + + prologue->PrologueLength = debug_line_data.getU32(offset_ptr); + const uint32_t end_prologue_offset = prologue->PrologueLength + *offset_ptr; + prologue->MinInstLength = debug_line_data.getU8(offset_ptr); + prologue->DefaultIsStmt = debug_line_data.getU8(offset_ptr); + prologue->LineBase = debug_line_data.getU8(offset_ptr); + prologue->LineRange = debug_line_data.getU8(offset_ptr); + prologue->OpcodeBase = debug_line_data.getU8(offset_ptr); + + prologue->StandardOpcodeLengths.reserve(prologue->OpcodeBase-1); + for (uint32_t i = 1; i < prologue->OpcodeBase; ++i) { + uint8_t op_len = debug_line_data.getU8(offset_ptr); + prologue->StandardOpcodeLengths.push_back(op_len); + } + + while (*offset_ptr < end_prologue_offset) { + const char *s = debug_line_data.getCStr(offset_ptr); + if (s && s[0]) + prologue->IncludeDirectories.push_back(s); + else + break; + } + + while (*offset_ptr < end_prologue_offset) { + const char *name = debug_line_data.getCStr(offset_ptr); + if (name && name[0]) { + FileNameEntry fileEntry; + fileEntry.Name = name; + fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); + fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); + fileEntry.Length = debug_line_data.getULEB128(offset_ptr); + prologue->FileNames.push_back(fileEntry); + } else { + break; + } + } + + if (*offset_ptr != end_prologue_offset) { + fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" + " have ended at 0x%8.8x but it ended ad 0x%8.8x\n", + prologue_offset, end_prologue_offset, *offset_ptr); + } + return end_prologue_offset; +} + +bool +DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, + uint32_t *offset_ptr, State &state) { + const uint32_t debug_line_offset = *offset_ptr; + + Prologue *prologue = &state.Prologue; + + if (!parsePrologue(debug_line_data, offset_ptr, prologue)) { + // Restore our offset and return false to indicate failure! + *offset_ptr = debug_line_offset; + return false; + } + + const uint32_t end_offset = debug_line_offset + prologue->TotalLength + + sizeof(prologue->TotalLength); + + state.reset(); + + while (*offset_ptr < end_offset) { + uint8_t opcode = debug_line_data.getU8(offset_ptr); + + if (opcode == 0) { + // Extended Opcodes always start with a zero opcode followed by + // a uleb128 length so you can skip ones you don't know about + uint32_t ext_offset = *offset_ptr; + uint64_t len = debug_line_data.getULEB128(offset_ptr); + uint32_t arg_size = len - (*offset_ptr - ext_offset); + + uint8_t sub_opcode = debug_line_data.getU8(offset_ptr); + switch (sub_opcode) { + case DW_LNE_end_sequence: + // Set the end_sequence register of the state machine to true and + // append a row to the matrix using the current values of the + // state-machine registers. Then reset the registers to the initial + // values specified above. Every statement program sequence must end + // with a DW_LNE_end_sequence instruction which creates a row whose + // address is that of the byte after the last target machine instruction + // of the sequence. + state.EndSequence = true; + state.appendRowToMatrix(*offset_ptr); + state.reset(); + break; + + case DW_LNE_set_address: + // Takes a single relocatable address as an operand. The size of the + // operand is the size appropriate to hold an address on the target + // machine. Set the address register to the value given by the + // relocatable address. All of the other statement program opcodes + // that affect the address register add a delta to it. This instruction + // stores a relocatable value into it instead. + state.Address = debug_line_data.getAddress(offset_ptr); + break; + + case DW_LNE_define_file: + // Takes 4 arguments. The first is a null terminated string containing + // a source file name. The second is an unsigned LEB128 number + // representing the directory index of the directory in which the file + // was found. The third is an unsigned LEB128 number representing the + // time of last modification of the file. The fourth is an unsigned + // LEB128 number representing the length in bytes of the file. The time + // and length fields may contain LEB128(0) if the information is not + // available. + // + // The directory index represents an entry in the include_directories + // section of the statement program prologue. The index is LEB128(0) + // if the file was found in the current directory of the compilation, + // LEB128(1) if it was found in the first directory in the + // include_directories section, and so on. The directory index is + // ignored for file names that represent full path names. + // + // The files are numbered, starting at 1, in the order in which they + // appear; the names in the prologue come before names defined by + // the DW_LNE_define_file instruction. These numbers are used in the + // the file register of the state machine. + { + FileNameEntry fileEntry; + fileEntry.Name = debug_line_data.getCStr(offset_ptr); + fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); + fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); + fileEntry.Length = debug_line_data.getULEB128(offset_ptr); + prologue->FileNames.push_back(fileEntry); + } + break; + + default: + // Length doesn't include the zero opcode byte or the length itself, but + // it does include the sub_opcode, so we have to adjust for that below + (*offset_ptr) += arg_size; + break; + } + } else if (opcode < prologue->OpcodeBase) { + switch (opcode) { + // Standard Opcodes + case DW_LNS_copy: + // Takes no arguments. Append a row to the matrix using the + // current values of the state-machine registers. Then set + // the basic_block register to false. + state.appendRowToMatrix(*offset_ptr); + break; + + case DW_LNS_advance_pc: + // Takes a single unsigned LEB128 operand, multiplies it by the + // min_inst_length field of the prologue, and adds the + // result to the address register of the state machine. + state.Address += debug_line_data.getULEB128(offset_ptr) * + prologue->MinInstLength; + break; + + case DW_LNS_advance_line: + // Takes a single signed LEB128 operand and adds that value to + // the line register of the state machine. + state.Line += debug_line_data.getSLEB128(offset_ptr); + break; + + case DW_LNS_set_file: + // Takes a single unsigned LEB128 operand and stores it in the file + // register of the state machine. + state.File = debug_line_data.getULEB128(offset_ptr); + break; + + case DW_LNS_set_column: + // Takes a single unsigned LEB128 operand and stores it in the + // column register of the state machine. + state.Column = debug_line_data.getULEB128(offset_ptr); + break; + + case DW_LNS_negate_stmt: + // Takes no arguments. Set the is_stmt register of the state + // machine to the logical negation of its current value. + state.IsStmt = !state.IsStmt; + break; + + case DW_LNS_set_basic_block: + // Takes no arguments. Set the basic_block register of the + // state machine to true + state.BasicBlock = true; + break; + + case DW_LNS_const_add_pc: + // Takes no arguments. Add to the address register of the state + // machine the address increment value corresponding to special + // opcode 255. The motivation for DW_LNS_const_add_pc is this: + // when the statement program needs to advance the address by a + // small amount, it can use a single special opcode, which occupies + // a single byte. When it needs to advance the address by up to + // twice the range of the last special opcode, it can use + // DW_LNS_const_add_pc followed by a special opcode, for a total + // of two bytes. Only if it needs to advance the address by more + // than twice that range will it need to use both DW_LNS_advance_pc + // and a special opcode, requiring three or more bytes. + { + uint8_t adjust_opcode = 255 - prologue->OpcodeBase; + uint64_t addr_offset = (adjust_opcode / prologue->LineRange) * + prologue->MinInstLength; + state.Address += addr_offset; + } + break; + + case DW_LNS_fixed_advance_pc: + // Takes a single uhalf operand. Add to the address register of + // the state machine the value of the (unencoded) operand. This + // is the only extended opcode that takes an argument that is not + // a variable length number. The motivation for DW_LNS_fixed_advance_pc + // is this: existing assemblers cannot emit DW_LNS_advance_pc or + // special opcodes because they cannot encode LEB128 numbers or + // judge when the computation of a special opcode overflows and + // requires the use of DW_LNS_advance_pc. Such assemblers, however, + // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. + state.Address += debug_line_data.getU16(offset_ptr); + break; + + case DW_LNS_set_prologue_end: + // Takes no arguments. Set the prologue_end register of the + // state machine to true + state.PrologueEnd = true; + break; + + case DW_LNS_set_epilogue_begin: + // Takes no arguments. Set the basic_block register of the + // state machine to true + state.EpilogueBegin = true; + break; + + case DW_LNS_set_isa: + // Takes a single unsigned LEB128 operand and stores it in the + // column register of the state machine. + state.Isa = debug_line_data.getULEB128(offset_ptr); + break; + + default: + // Handle any unknown standard opcodes here. We know the lengths + // of such opcodes because they are specified in the prologue + // as a multiple of LEB128 operands for each opcode. + { + assert(opcode - 1U < prologue->StandardOpcodeLengths.size()); + uint8_t opcode_length = prologue->StandardOpcodeLengths[opcode - 1]; + for (uint8_t i=0; i<opcode_length; ++i) + debug_line_data.getULEB128(offset_ptr); + } + break; + } + } else { + // Special Opcodes + + // A special opcode value is chosen based on the amount that needs + // to be added to the line and address registers. The maximum line + // increment for a special opcode is the value of the line_base + // field in the header, plus the value of the line_range field, + // minus 1 (line base + line range - 1). If the desired line + // increment is greater than the maximum line increment, a standard + // opcode must be used instead of a special opcode. The "address + // advance" is calculated by dividing the desired address increment + // by the minimum_instruction_length field from the header. The + // special opcode is then calculated using the following formula: + // + // opcode = (desired line increment - line_base) + + // (line_range * address advance) + opcode_base + // + // If the resulting opcode is greater than 255, a standard opcode + // must be used instead. + // + // To decode a special opcode, subtract the opcode_base from the + // opcode itself to give the adjusted opcode. The amount to + // increment the address register is the result of the adjusted + // opcode divided by the line_range multiplied by the + // minimum_instruction_length field from the header. That is: + // + // address increment = (adjusted opcode / line_range) * + // minimum_instruction_length + // + // The amount to increment the line register is the line_base plus + // the result of the adjusted opcode modulo the line_range. That is: + // + // line increment = line_base + (adjusted opcode % line_range) + + uint8_t adjust_opcode = opcode - prologue->OpcodeBase; + uint64_t addr_offset = (adjust_opcode / prologue->LineRange) * + prologue->MinInstLength; + int32_t line_offset = prologue->LineBase + + (adjust_opcode % prologue->LineRange); + state.Line += line_offset; + state.Address += addr_offset; + state.appendRowToMatrix(*offset_ptr); + } + } + + state.finalize(*offset_ptr); + + return end_offset; +} + +static bool findMatchingAddress(const DWARFDebugLine::Row& row1, + const DWARFDebugLine::Row& row2) { + return row1.Address < row2.Address; +} + +uint32_t +DWARFDebugLine::LineTable::lookupAddress(uint64_t address, + uint64_t cu_high_pc) const { + uint32_t index = UINT32_MAX; + if (!Rows.empty()) { + // Use the lower_bound algorithm to perform a binary search since we know + // that our line table data is ordered by address. + DWARFDebugLine::Row row; + row.Address = address; + typedef std::vector<Row>::const_iterator iterator; + iterator begin_pos = Rows.begin(); + iterator end_pos = Rows.end(); + iterator pos = std::lower_bound(begin_pos, end_pos, row, + findMatchingAddress); + if (pos == end_pos) { + if (address < cu_high_pc) + return Rows.size()-1; + } else { + // Rely on fact that we are using a std::vector and we can do + // pointer arithmetic to find the row index (which will be one less + // that what we found since it will find the first position after + // the current address) since std::vector iterators are just + // pointers to the container type. + index = pos - begin_pos; + if (pos->Address > address) { + if (index > 0) + --index; + else + index = UINT32_MAX; + } + } + } + return index; // Failed to find address. +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h new file mode 100644 index 0000000..bc6a70b --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h @@ -0,0 +1,190 @@ +//===-- DWARFDebugLine.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H +#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H + +#include "llvm/Support/DataExtractor.h" +#include <map> +#include <string> +#include <vector> + +namespace llvm { + +class raw_ostream; + +class DWARFDebugLine { +public: + struct FileNameEntry { + FileNameEntry() : DirIdx(0), ModTime(0), Length(0) {} + + std::string Name; + uint64_t DirIdx; + uint64_t ModTime; + uint64_t Length; + }; + + struct Prologue { + Prologue() + : TotalLength(0), Version(0), PrologueLength(0), MinInstLength(0), + DefaultIsStmt(0), LineBase(0), LineRange(0), OpcodeBase(0) {} + + // The size in bytes of the statement information for this compilation unit + // (not including the total_length field itself). + uint32_t TotalLength; + // Version identifier for the statement information format. + uint16_t Version; + // The number of bytes following the prologue_length field to the beginning + // of the first byte of the statement program itself. + uint32_t PrologueLength; + // The size in bytes of the smallest target machine instruction. Statement + // program opcodes that alter the address register first multiply their + // operands by this value. + uint8_t MinInstLength; + // The initial value of theis_stmtregister. + uint8_t DefaultIsStmt; + // This parameter affects the meaning of the special opcodes. See below. + int8_t LineBase; + // This parameter affects the meaning of the special opcodes. See below. + uint8_t LineRange; + // The number assigned to the first special opcode. + uint8_t OpcodeBase; + std::vector<uint8_t> StandardOpcodeLengths; + std::vector<std::string> IncludeDirectories; + std::vector<FileNameEntry> FileNames; + + // Length of the prologue in bytes. + uint32_t getLength() const { + return PrologueLength + sizeof(TotalLength) + sizeof(Version) + + sizeof(PrologueLength); + } + // Length of the line table data in bytes (not including the prologue). + uint32_t getStatementTableLength() const { + return TotalLength + sizeof(TotalLength) - getLength(); + } + int32_t getMaxLineIncrementForSpecialOpcode() const { + return LineBase + (int8_t)LineRange - 1; + } + void dump(raw_ostream &OS) const; + void clear() { + TotalLength = Version = PrologueLength = 0; + MinInstLength = LineBase = LineRange = OpcodeBase = 0; + StandardOpcodeLengths.clear(); + IncludeDirectories.clear(); + FileNames.clear(); + } + }; + + // Standard .debug_line state machine structure. + struct Row { + Row(bool default_is_stmt = false) { reset(default_is_stmt); } + /// Called after a row is appended to the matrix. + void postAppend(); + void reset(bool default_is_stmt); + void dump(raw_ostream &OS) const; + + // The program-counter value corresponding to a machine instruction + // generated by the compiler. + uint64_t Address; + // An unsigned integer indicating a source line number. Lines are numbered + // beginning at 1. The compiler may emit the value 0 in cases where an + // instruction cannot be attributed to any source line. + uint32_t Line; + // An unsigned integer indicating a column number within a source line. + // Columns are numbered beginning at 1. The value 0 is reserved to indicate + // that a statement begins at the 'left edge' of the line. + uint16_t Column; + // An unsigned integer indicating the identity of the source file + // corresponding to a machine instruction. + uint16_t File; + // An unsigned integer whose value encodes the applicable instruction set + // architecture for the current instruction. + uint8_t Isa; + // A boolean indicating that the current instruction is the beginning of a + // statement. + uint8_t IsStmt:1, + // A boolean indicating that the current instruction is the + // beginning of a basic block. + BasicBlock:1, + // A boolean indicating that the current address is that of the + // first byte after the end of a sequence of target machine + // instructions. + EndSequence:1, + // A boolean indicating that the current address is one (of possibly + // many) where execution should be suspended for an entry breakpoint + // of a function. + PrologueEnd:1, + // A boolean indicating that the current address is one (of possibly + // many) where execution should be suspended for an exit breakpoint + // of a function. + EpilogueBegin:1; + }; + + struct LineTable { + void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); } + void clear() { + Prologue.clear(); + Rows.clear(); + } + + uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const; + void dump(raw_ostream &OS) const; + + struct Prologue Prologue; + std::vector<Row> Rows; + }; + + struct State : public Row, public LineTable { + // Special row codes. + enum { + StartParsingLineTable = 0, + DoneParsingLineTable = -1 + }; + + State() : row(StartParsingLineTable) {} + virtual ~State(); + + virtual void appendRowToMatrix(uint32_t offset); + virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; } + virtual void reset() { Row::reset(Prologue.DefaultIsStmt); } + + // The row number that starts at zero for the prologue, and increases for + // each row added to the matrix. + unsigned row; + }; + + struct DumpingState : public State { + DumpingState(raw_ostream &OS) : OS(OS) {} + virtual ~DumpingState(); + virtual void finalize(uint32_t offset); + private: + raw_ostream &OS; + }; + + static bool parsePrologue(DataExtractor debug_line_data, uint32_t *offset_ptr, + Prologue *prologue); + /// Parse a single line table (prologue and all rows). + static bool parseStatementTable(DataExtractor debug_line_data, + uint32_t *offset_ptr, State &state); + + const LineTable *getLineTable(uint32_t offset) const; + const LineTable *getOrParseLineTable(DataExtractor debug_line_data, + uint32_t offset); + +private: + typedef std::map<uint32_t, LineTable> LineTableMapTy; + typedef LineTableMapTy::iterator LineTableIter; + typedef LineTableMapTy::const_iterator LineTableConstIter; + + LineTableMapTy LineTableMap; +}; + +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp new file mode 100644 index 0000000..705efe5 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp @@ -0,0 +1,427 @@ +//===-- DWARFFormValue.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFFormValue.h" +#include "DWARFCompileUnit.h" +#include "DWARFContext.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +using namespace llvm; +using namespace dwarf; + +static const uint8_t form_sizes_addr4[] = { + 0, // 0x00 unused + 4, // 0x01 DW_FORM_addr + 0, // 0x02 unused + 0, // 0x03 DW_FORM_block2 + 0, // 0x04 DW_FORM_block4 + 2, // 0x05 DW_FORM_data2 + 4, // 0x06 DW_FORM_data4 + 8, // 0x07 DW_FORM_data8 + 0, // 0x08 DW_FORM_string + 0, // 0x09 DW_FORM_block + 0, // 0x0a DW_FORM_block1 + 1, // 0x0b DW_FORM_data1 + 1, // 0x0c DW_FORM_flag + 0, // 0x0d DW_FORM_sdata + 4, // 0x0e DW_FORM_strp + 0, // 0x0f DW_FORM_udata + 4, // 0x10 DW_FORM_ref_addr + 1, // 0x11 DW_FORM_ref1 + 2, // 0x12 DW_FORM_ref2 + 4, // 0x13 DW_FORM_ref4 + 8, // 0x14 DW_FORM_ref8 + 0, // 0x15 DW_FORM_ref_udata + 0, // 0x16 DW_FORM_indirect +}; + +static const uint8_t form_sizes_addr8[] = { + 0, // 0x00 unused + 8, // 0x01 DW_FORM_addr + 0, // 0x02 unused + 0, // 0x03 DW_FORM_block2 + 0, // 0x04 DW_FORM_block4 + 2, // 0x05 DW_FORM_data2 + 4, // 0x06 DW_FORM_data4 + 8, // 0x07 DW_FORM_data8 + 0, // 0x08 DW_FORM_string + 0, // 0x09 DW_FORM_block + 0, // 0x0a DW_FORM_block1 + 1, // 0x0b DW_FORM_data1 + 1, // 0x0c DW_FORM_flag + 0, // 0x0d DW_FORM_sdata + 4, // 0x0e DW_FORM_strp + 0, // 0x0f DW_FORM_udata + 8, // 0x10 DW_FORM_ref_addr + 1, // 0x11 DW_FORM_ref1 + 2, // 0x12 DW_FORM_ref2 + 4, // 0x13 DW_FORM_ref4 + 8, // 0x14 DW_FORM_ref8 + 0, // 0x15 DW_FORM_ref_udata + 0, // 0x16 DW_FORM_indirect +}; + +const uint8_t * +DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) { + switch (addr_size) { + case 4: return form_sizes_addr4; + case 8: return form_sizes_addr8; + } + return NULL; +} + +bool +DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, + const DWARFCompileUnit *cu) { + bool indirect = false; + bool is_block = false; + Value.data = NULL; + // Read the value for the form into value and follow and DW_FORM_indirect + // instances we run into + do { + indirect = false; + switch (Form) { + case DW_FORM_addr: + case DW_FORM_ref_addr: + Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); + break; + case DW_FORM_block: + Value.uval = data.getULEB128(offset_ptr); + is_block = true; + break; + case DW_FORM_block1: + Value.uval = data.getU8(offset_ptr); + is_block = true; + break; + case DW_FORM_block2: + Value.uval = data.getU16(offset_ptr); + is_block = true; + break; + case DW_FORM_block4: + Value.uval = data.getU32(offset_ptr); + is_block = true; + break; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + Value.uval = data.getU8(offset_ptr); + break; + case DW_FORM_data2: + case DW_FORM_ref2: + Value.uval = data.getU16(offset_ptr); + break; + case DW_FORM_data4: + case DW_FORM_ref4: + Value.uval = data.getU32(offset_ptr); + break; + case DW_FORM_data8: + case DW_FORM_ref8: + Value.uval = data.getU64(offset_ptr); + break; + case DW_FORM_sdata: + Value.sval = data.getSLEB128(offset_ptr); + break; + case DW_FORM_strp: + Value.uval = data.getU32(offset_ptr); + break; + case DW_FORM_udata: + case DW_FORM_ref_udata: + Value.uval = data.getULEB128(offset_ptr); + break; + case DW_FORM_string: + Value.cstr = data.getCStr(offset_ptr); + // Set the string value to also be the data for inlined cstr form + // values only so we can tell the differnence between DW_FORM_string + // and DW_FORM_strp form values + Value.data = (uint8_t*)Value.cstr; + break; + case DW_FORM_indirect: + Form = data.getULEB128(offset_ptr); + indirect = true; + break; + default: + return false; + } + } while (indirect); + + if (is_block) { + StringRef str = data.getData().substr(*offset_ptr, Value.uval); + Value.data = NULL; + if (!str.empty()) { + Value.data = reinterpret_cast<const uint8_t *>(str.data()); + *offset_ptr += Value.uval; + } + } + + return true; +} + +bool +DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, + const DWARFCompileUnit *cu) const { + return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu); +} + +bool +DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, const DWARFCompileUnit *cu) { + bool indirect = false; + do { + indirect = false; + switch (form) { + // Blocks if inlined data that have a length field and the data bytes + // inlined in the .debug_info + case DW_FORM_block: { + uint64_t size = debug_info_data.getULEB128(offset_ptr); + *offset_ptr += size; + return true; + } + case DW_FORM_block1: { + uint8_t size = debug_info_data.getU8(offset_ptr); + *offset_ptr += size; + return true; + } + case DW_FORM_block2: { + uint16_t size = debug_info_data.getU16(offset_ptr); + *offset_ptr += size; + return true; + } + case DW_FORM_block4: { + uint32_t size = debug_info_data.getU32(offset_ptr); + *offset_ptr += size; + return true; + } + + // Inlined NULL terminated C-strings + case DW_FORM_string: + debug_info_data.getCStr(offset_ptr); + return true; + + // Compile unit address sized values + case DW_FORM_addr: + case DW_FORM_ref_addr: + *offset_ptr += cu->getAddressByteSize(); + return true; + + // 1 byte values + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + *offset_ptr += 1; + return true; + + // 2 byte values + case DW_FORM_data2: + case DW_FORM_ref2: + *offset_ptr += 2; + return true; + + // 4 byte values + case DW_FORM_strp: + case DW_FORM_data4: + case DW_FORM_ref4: + *offset_ptr += 4; + return true; + + // 8 byte values + case DW_FORM_data8: + case DW_FORM_ref8: + *offset_ptr += 8; + return true; + + // signed or unsigned LEB 128 values + // case DW_FORM_APPLE_db_str: + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + debug_info_data.getULEB128(offset_ptr); + return true; + + case DW_FORM_indirect: + indirect = true; + form = debug_info_data.getULEB128(offset_ptr); + break; + default: + return false; + } + } while (indirect); + return true; +} + +void +DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { + DataExtractor debug_str_data(cu->getContext().getStringSection(), true, 0); + uint64_t uvalue = getUnsigned(); + bool cu_relative_offset = false; + + switch (Form) { + case DW_FORM_addr: OS << format("0x%016x", uvalue); break; + case DW_FORM_flag: + case DW_FORM_data1: OS << format("0x%02x", uvalue); break; + case DW_FORM_data2: OS << format("0x%04x", uvalue); break; + case DW_FORM_data4: OS << format("0x%08x", uvalue); break; + case DW_FORM_data8: OS << format("0x%016x", uvalue); break; + case DW_FORM_string: + OS << '"'; + OS.write_escaped(getAsCString(NULL)); + OS << '"'; + break; + case DW_FORM_block: + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + if (uvalue > 0) { + switch (Form) { + case DW_FORM_block: OS << format("<0x%llx> ", uvalue); break; + case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; + case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; + case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break; + default: break; + } + + const uint8_t* data_ptr = Value.data; + if (data_ptr) { + // uvalue contains size of block + const uint8_t* end_data_ptr = data_ptr + uvalue; + while (data_ptr < end_data_ptr) { + OS << format("%2.2x ", *data_ptr); + ++data_ptr; + } + } + else + OS << "NULL"; + } + break; + + case DW_FORM_sdata: OS << getSigned(); break; + case DW_FORM_udata: OS << getUnsigned(); break; + case DW_FORM_strp: { + OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue); + const char* dbg_str = getAsCString(&debug_str_data); + if (dbg_str) { + OS << '"'; + OS.write_escaped(dbg_str); + OS << '"'; + } + break; + } + case DW_FORM_ref_addr: + OS << format("0x%016x", uvalue); + break; + case DW_FORM_ref1: + cu_relative_offset = true; + OS << format("cu + 0x%2.2x", (uint8_t)uvalue); + break; + case DW_FORM_ref2: + cu_relative_offset = true; + OS << format("cu + 0x%4.4x", (uint16_t)uvalue); + break; + case DW_FORM_ref4: + cu_relative_offset = true; + OS << format("cu + 0x%4.4x", (uint32_t)uvalue); + break; + case DW_FORM_ref8: + cu_relative_offset = true; + OS << format("cu + 0x%8.8llx", uvalue); + break; + case DW_FORM_ref_udata: + cu_relative_offset = true; + OS << format("cu + 0x%llx", uvalue); + break; + + // All DW_FORM_indirect attributes should be resolved prior to calling + // this function + case DW_FORM_indirect: + OS << "DW_FORM_indirect"; + break; + default: + OS << format("DW_FORM(0x%4.4x)", Form); + break; + } + + if (cu_relative_offset) + OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0))); +} + +const char* +DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const { + if (isInlinedCStr()) { + return Value.cstr; + } else if (debug_str_data_ptr) { + uint32_t offset = Value.uval; + return debug_str_data_ptr->getCStr(&offset); + } + return NULL; +} + +uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const { + uint64_t die_offset = Value.uval; + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: + die_offset += (cu ? cu->getOffset() : 0); + break; + default: + break; + } + + return die_offset; +} + +bool +DWARFFormValue::resolveCompileUnitReferences(const DWARFCompileUnit *cu) { + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: + Value.uval += cu->getOffset(); + Form = DW_FORM_ref_addr; + return true; + default: + break; + } + return false; +} + +const uint8_t *DWARFFormValue::BlockData() const { + if (!isInlinedCStr()) + return Value.data; + return NULL; +} + +bool DWARFFormValue::isBlockForm(uint16_t form) { + switch (form) { + case DW_FORM_block: + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + return true; + } + return false; +} + +bool DWARFFormValue::isDataForm(uint16_t form) { + switch (form) { + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_data1: + case DW_FORM_data2: + case DW_FORM_data4: + case DW_FORM_data8: + return true; + } + return false; +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h new file mode 100644 index 0000000..22ac011 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h @@ -0,0 +1,78 @@ +//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H +#define LLVM_DEBUGINFO_DWARFFORMVALUE_H + +#include "llvm/Support/DataExtractor.h" + +namespace llvm { + +class DWARFCompileUnit; +class raw_ostream; + +class DWARFFormValue { +public: + struct ValueType { + ValueType() : data(NULL) { + uval = 0; + } + + union { + uint64_t uval; + int64_t sval; + const char* cstr; + }; + const uint8_t* data; + }; + + enum { + eValueTypeInvalid = 0, + eValueTypeUnsigned, + eValueTypeSigned, + eValueTypeCStr, + eValueTypeBlock + }; + +private: + uint16_t Form; // Form for this value. + ValueType Value; // Contains all data for the form. + +public: + DWARFFormValue(uint16_t form = 0) : Form(form) {} + uint16_t getForm() const { return Form; } + const ValueType& value() const { return Value; } + void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const; + bool extractValue(DataExtractor data, uint32_t *offset_ptr, + const DWARFCompileUnit *cu); + bool isInlinedCStr() const { + return Value.data != NULL && Value.data == (uint8_t*)Value.cstr; + } + const uint8_t *BlockData() const; + uint64_t getReference(const DWARFCompileUnit* cu) const; + + /// Resolve any compile unit specific references so that we don't need + /// the compile unit at a later time in order to work with the form + /// value. + bool resolveCompileUnitReferences(const DWARFCompileUnit* cu); + uint64_t getUnsigned() const { return Value.uval; } + int64_t getSigned() const { return Value.sval; } + const char *getAsCString(const DataExtractor *debug_str_data_ptr) const; + bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, + const DWARFCompileUnit *cu) const; + static bool skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, const DWARFCompileUnit *cu); + static bool isBlockForm(uint16_t form); + static bool isDataForm(uint16_t form); + static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size); +}; + +} + +#endif diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index 7652090..525877b 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -93,7 +93,7 @@ public: /// \brief Returns the address the GlobalVariable should be written into. The /// GVMemoryBlock object prefixes that. static char *Create(const GlobalVariable *GV, const TargetData& TD) { - const Type *ElTy = GV->getType()->getElementType(); + Type *ElTy = GV->getType()->getElementType(); size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy); void *RawMemory = ::operator new( TargetData::RoundUpAlignment(sizeof(GVMemoryBlock), @@ -272,7 +272,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, Array = new char[(InputArgv.size()+1)*PtrSize]; DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); - const Type *SBytePtr = Type::getInt8PtrTy(C); + Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; @@ -361,8 +361,8 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, // Check main() type unsigned NumArgs = Fn->getFunctionType()->getNumParams(); - const FunctionType *FTy = Fn->getFunctionType(); - const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo(); + FunctionType *FTy = Fn->getFunctionType(); + Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo(); // Check the argument types. if (NumArgs > 3) @@ -422,6 +422,7 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, + Reloc::Model RM, CodeModel::Model CMM) { if (ExecutionEngine::JITCtor == 0) { if (ErrorStr) @@ -436,9 +437,8 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, SmallVector<std::string, 1> MAttrs; TargetMachine *TM = - EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr); + EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; - TM->setCodeModel(CMM); return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM); } @@ -465,10 +465,9 @@ ExecutionEngine *EngineBuilder::create() { // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. if (WhichEngine & EngineKind::JIT) { - if (TargetMachine *TM = - EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr)) { - TM->setCodeModel(CMModel); - + if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, + RelocModel, CMModel, + ErrorStr)) { if (UseMCJIT && ExecutionEngine::MCJITCtor) { ExecutionEngine *EE = ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel, @@ -548,8 +547,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // Compute the index GenericValue Result = getConstantValue(Op0); SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end()); - uint64_t Offset = - TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size()); + uint64_t Offset = TD->getIndexedOffset(Op0->getType(), Indices); char* tmp = (char*) Result.PointerVal; Result = PTOGV(tmp + Offset); @@ -651,7 +649,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::BitCast: { GenericValue GV = getConstantValue(Op0); - const Type* DestTy = CE->getType(); + Type* DestTy = CE->getType(); switch (Op0->getType()->getTypeID()) { default: llvm_unreachable("Invalid bitcast operand"); case Type::IntegerTyID: @@ -847,7 +845,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, } void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, - GenericValue *Ptr, const Type *Ty) { + GenericValue *Ptr, Type *Ty) { const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { @@ -909,7 +907,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { /// void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, - const Type *Ty) { + Type *Ty) { const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { @@ -932,7 +930,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, // FIXME: Will not trap if loading a signaling NaN. uint64_t y[2]; memcpy(y, Ptr, 10); - Result.IntVal = APInt(80, 2, y); + Result.IntVal = APInt(80, y); break; } default: @@ -986,7 +984,7 @@ void ExecutionEngine::emitGlobals() { // Loop over all of the global variables in the program, allocating the memory // to hold them. If there is more than one module, do a prepass over globals // to figure out how the different modules should link together. - std::map<std::pair<std::string, const Type*>, + std::map<std::pair<std::string, Type*>, const GlobalValue*> LinkedGlobalsMap; if (Modules.size() != 1) { @@ -1101,7 +1099,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { if (!GV->isThreadLocal()) InitializeMemory(GV->getInitializer(), GA); - const Type *ElTy = GV->getType()->getElementType(); + Type *ElTy = GV->getType()->getElementType(); size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); NumInitBytes += (unsigned)GVSize; ++NumGlobals; diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 498063b..27917da 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -51,7 +51,7 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) { break static void executeFAddInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { IMPLEMENT_BINARY_OPERATOR(+, Float); IMPLEMENT_BINARY_OPERATOR(+, Double); @@ -62,7 +62,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1, } static void executeFSubInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { IMPLEMENT_BINARY_OPERATOR(-, Float); IMPLEMENT_BINARY_OPERATOR(-, Double); @@ -73,7 +73,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1, } static void executeFMulInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { IMPLEMENT_BINARY_OPERATOR(*, Float); IMPLEMENT_BINARY_OPERATOR(*, Double); @@ -84,7 +84,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, } static void executeFDivInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { IMPLEMENT_BINARY_OPERATOR(/, Float); IMPLEMENT_BINARY_OPERATOR(/, Double); @@ -95,7 +95,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, } static void executeFRemInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { case Type::FloatTyID: Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal); @@ -125,7 +125,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, break; static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(eq,Ty); @@ -138,7 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ne,Ty); @@ -151,7 +151,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ult,Ty); @@ -164,7 +164,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(slt,Ty); @@ -177,7 +177,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ugt,Ty); @@ -190,7 +190,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sgt,Ty); @@ -203,7 +203,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ule,Ty); @@ -216,7 +216,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sle,Ty); @@ -229,7 +229,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(uge,Ty); @@ -242,7 +242,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, } static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sge,Ty); @@ -256,7 +256,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, void Interpreter::visitICmpInst(ICmpInst &I) { ExecutionContext &SF = ECStack.back(); - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result @@ -286,7 +286,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { break static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(==, Float); @@ -299,7 +299,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(!=, Float); @@ -313,7 +313,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<=, Float); @@ -326,7 +326,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>=, Float); @@ -339,7 +339,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<, Float); @@ -352,7 +352,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>, Float); @@ -377,49 +377,49 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_OEQ(Src1, Src2, Ty); } static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_ONE(Src1, Src2, Ty); } static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_OLE(Src1, Src2, Ty); } static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_OGE(Src1, Src2, Ty); } static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_OLT(Src1, Src2, Ty); } static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) return executeFCMP_OGT(Src1, Src2, Ty); } static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && @@ -431,7 +431,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, - const Type *Ty) { + Type *Ty) { GenericValue Dest; if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || @@ -444,7 +444,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, void Interpreter::visitFCmpInst(FCmpInst &I) { ExecutionContext &SF = ECStack.back(); - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result @@ -475,7 +475,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { } static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, - GenericValue Src2, const Type *Ty) { + GenericValue Src2, Type *Ty) { GenericValue Result; switch (predicate) { case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty); @@ -520,7 +520,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, void Interpreter::visitBinaryOperator(BinaryOperator &I) { ExecutionContext &SF = ECStack.back(); - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result @@ -585,7 +585,7 @@ void Interpreter::exitCalled(GenericValue GV) { /// care of switching to the normal destination BB, if we are returning /// from an invoke. /// -void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, +void Interpreter::popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result) { // Pop the current stack frame. ECStack.pop_back(); @@ -613,7 +613,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, void Interpreter::visitReturnInst(ReturnInst &I) { ExecutionContext &SF = ECStack.back(); - const Type *RetTy = Type::getVoidTy(I.getContext()); + Type *RetTy = Type::getVoidTy(I.getContext()); GenericValue Result; // Save away the return value... (if we are not 'ret void') @@ -662,18 +662,21 @@ void Interpreter::visitBranchInst(BranchInst &I) { void Interpreter::visitSwitchInst(SwitchInst &I) { ExecutionContext &SF = ECStack.back(); - GenericValue CondVal = getOperandValue(I.getOperand(0), SF); - const Type *ElTy = I.getOperand(0)->getType(); + Value* Cond = I.getCondition(); + Type *ElTy = Cond->getType(); + GenericValue CondVal = getOperandValue(Cond, SF); // Check to see if any of the cases match... BasicBlock *Dest = 0; - for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2) - if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy) - .IntVal != 0) { - Dest = cast<BasicBlock>(I.getOperand(i+1)); + unsigned NumCases = I.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF); + if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { + Dest = cast<BasicBlock>(I.getSuccessor(i)); break; } - + } if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default SwitchToNewBasicBlock(Dest, SF); } @@ -730,7 +733,7 @@ void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){ void Interpreter::visitAllocaInst(AllocaInst &I) { ExecutionContext &SF = ECStack.back(); - const Type *Ty = I.getType()->getElementType(); // Type to be allocated + Type *Ty = I.getType()->getElementType(); // Type to be allocated // Get the number of elements being allocated by the array... unsigned NumElements = @@ -767,7 +770,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, uint64_t Total = 0; for (; I != E; ++I) { - if (const StructType *STy = dyn_cast<StructType>(*I)) { + if (StructType *STy = dyn_cast<StructType>(*I)) { const StructLayout *SLO = TD.getStructLayout(STy); const ConstantInt *CPU = cast<ConstantInt>(I.getOperand()); @@ -775,7 +778,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, Total += SLO->getElementOffset(Index); } else { - const SequentialType *ST = cast<SequentialType>(*I); + SequentialType *ST = cast<SequentialType>(*I); // Get the index number for the array... which must be long type... GenericValue IdxGV = getOperandValue(I.getOperand(), SF); @@ -929,34 +932,34 @@ void Interpreter::visitAShr(BinaryOperator &I) { SetValue(&I, Dest, SF); } -GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - const IntegerType *DITy = cast<IntegerType>(DstTy); + IntegerType *DITy = cast<IntegerType>(DstTy); unsigned DBitWidth = DITy->getBitWidth(); Dest.IntVal = Src.IntVal.trunc(DBitWidth); return Dest; } -GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - const IntegerType *DITy = cast<IntegerType>(DstTy); + IntegerType *DITy = cast<IntegerType>(DstTy); unsigned DBitWidth = DITy->getBitWidth(); Dest.IntVal = Src.IntVal.sext(DBitWidth); return Dest; } -GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - const IntegerType *DITy = cast<IntegerType>(DstTy); + IntegerType *DITy = cast<IntegerType>(DstTy); unsigned DBitWidth = DITy->getBitWidth(); Dest.IntVal = Src.IntVal.zext(DBitWidth); return Dest; } -GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() && @@ -965,7 +968,7 @@ GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() && @@ -974,9 +977,9 @@ GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - const Type *SrcTy = SrcVal->getType(); + Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction"); @@ -988,9 +991,9 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - const Type *SrcTy = SrcVal->getType(); + Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction"); @@ -1002,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction"); @@ -1014,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction"); @@ -1027,7 +1030,7 @@ GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy, } -GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); @@ -1037,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction"); @@ -1050,10 +1053,10 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy, return Dest; } -GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy, +GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - const Type *SrcTy = SrcVal->getType(); + Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (DstTy->isPointerTy()) { assert(SrcTy->isPointerTy() && "Invalid BitCast"); @@ -1155,7 +1158,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { GenericValue Dest; GenericValue Src = ECStack[VAList.UIntPairVal.first] .VarArgs[VAList.UIntPairVal.second]; - const Type *Ty = I.getType(); + Type *Ty = I.getType(); switch (Ty->getTypeID()) { case Type::IntegerTyID: Dest.IntVal = Src.IntVal; IMPLEMENT_VAARG(Pointer); @@ -1222,7 +1225,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, GenericValue Op0 = getOperandValue(CE->getOperand(0), SF); GenericValue Op1 = getOperandValue(CE->getOperand(1), SF); GenericValue Dest; - const Type * Ty = CE->getOperand(0)->getType(); + Type * Ty = CE->getOperand(0)->getType(); switch (CE->getOpcode()) { case Instruction::Add: Dest.IntVal = Op0.IntVal + Op1.IntVal; break; case Instruction::Sub: Dest.IntVal = Op0.IntVal - Op1.IntVal; break; diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index f7e2a4d..055875c 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -48,7 +48,7 @@ using namespace llvm; static ManagedStatic<sys::Mutex> FunctionsLock; -typedef GenericValue (*ExFunc)(const FunctionType *, +typedef GenericValue (*ExFunc)(FunctionType *, const std::vector<GenericValue> &); static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions; static std::map<std::string, ExFunc> FuncNames; @@ -60,7 +60,7 @@ static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions; static Interpreter *TheInterpreter; -static char getTypeID(const Type *Ty) { +static char getTypeID(Type *Ty) { switch (Ty->getTypeID()) { case Type::VoidTyID: return 'V'; case Type::IntegerTyID: @@ -91,7 +91,7 @@ static ExFunc lookupFunction(const Function *F) { // Function not found, look it up... start by figuring out what the // composite function name should be. std::string ExtName = "lle_"; - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) ExtName += getTypeID(FT->getContainedType(i)); ExtName + "_" + F->getNameStr(); @@ -109,7 +109,7 @@ static ExFunc lookupFunction(const Function *F) { } #ifdef USE_LIBFFI -static ffi_type *ffiTypeFor(const Type *Ty) { +static ffi_type *ffiTypeFor(Type *Ty) { switch (Ty->getTypeID()) { case Type::VoidTyID: return &ffi_type_void; case Type::IntegerTyID: @@ -129,7 +129,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) { return NULL; } -static void *ffiValueFor(const Type *Ty, const GenericValue &AV, +static void *ffiValueFor(Type *Ty, const GenericValue &AV, void *ArgDataPtr) { switch (Ty->getTypeID()) { case Type::IntegerTyID: @@ -181,7 +181,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, const std::vector<GenericValue> &ArgVals, const TargetData *TD, GenericValue &Result) { ffi_cif cif; - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); const unsigned NumArgs = F->arg_size(); // TODO: We don't have type information about the remaining arguments, because @@ -197,7 +197,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { const unsigned ArgNo = A->getArgNo(); - const Type *ArgTy = FTy->getParamType(ArgNo); + Type *ArgTy = FTy->getParamType(ArgNo); args[ArgNo] = ffiTypeFor(ArgTy); ArgBytes += TD->getTypeStoreSize(ArgTy); } @@ -209,12 +209,12 @@ static bool ffiInvoke(RawFunc Fn, Function *F, for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { const unsigned ArgNo = A->getArgNo(); - const Type *ArgTy = FTy->getParamType(ArgNo); + Type *ArgTy = FTy->getParamType(ArgNo); values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr); ArgDataPtr += TD->getTypeStoreSize(ArgTy); } - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); ffi_type *rtype = ffiTypeFor(RetTy); if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) { @@ -304,7 +304,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, extern "C" { // Don't add C++ manglings to llvm mangling :) // void atexit(Function*) -GenericValue lle_X_atexit(const FunctionType *FT, +GenericValue lle_X_atexit(FunctionType *FT, const std::vector<GenericValue> &Args) { assert(Args.size() == 1); TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0])); @@ -314,14 +314,14 @@ GenericValue lle_X_atexit(const FunctionType *FT, } // void exit(int) -GenericValue lle_X_exit(const FunctionType *FT, +GenericValue lle_X_exit(FunctionType *FT, const std::vector<GenericValue> &Args) { TheInterpreter->exitCalled(Args[0]); return GenericValue(); } // void abort(void) -GenericValue lle_X_abort(const FunctionType *FT, +GenericValue lle_X_abort(FunctionType *FT, const std::vector<GenericValue> &Args) { //FIXME: should we report or raise here? //report_fatal_error("Interpreted program raised SIGABRT"); @@ -331,7 +331,7 @@ GenericValue lle_X_abort(const FunctionType *FT, // int sprintf(char *, const char *, ...) - a very rough implementation to make // output useful. -GenericValue lle_X_sprintf(const FunctionType *FT, +GenericValue lle_X_sprintf(FunctionType *FT, const std::vector<GenericValue> &Args) { char *OutputBuffer = (char *)GVTOP(Args[0]); const char *FmtStr = (const char *)GVTOP(Args[1]); @@ -413,7 +413,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT, // int printf(const char *, ...) - a very rough implementation to make output // useful. -GenericValue lle_X_printf(const FunctionType *FT, +GenericValue lle_X_printf(FunctionType *FT, const std::vector<GenericValue> &Args) { char Buffer[10000]; std::vector<GenericValue> NewArgs; @@ -425,7 +425,7 @@ GenericValue lle_X_printf(const FunctionType *FT, } // int sscanf(const char *format, ...); -GenericValue lle_X_sscanf(const FunctionType *FT, +GenericValue lle_X_sscanf(FunctionType *FT, const std::vector<GenericValue> &args) { assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!"); @@ -440,7 +440,7 @@ GenericValue lle_X_sscanf(const FunctionType *FT, } // int scanf(const char *format, ...); -GenericValue lle_X_scanf(const FunctionType *FT, +GenericValue lle_X_scanf(FunctionType *FT, const std::vector<GenericValue> &args) { assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!"); @@ -456,7 +456,7 @@ GenericValue lle_X_scanf(const FunctionType *FT, // int fprintf(FILE *, const char *, ...) - a very rough implementation to make // output useful. -GenericValue lle_X_fprintf(const FunctionType *FT, +GenericValue lle_X_fprintf(FunctionType *FT, const std::vector<GenericValue> &Args) { assert(Args.size() >= 2); char Buffer[10000]; diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index bfebe3d..ee2b459 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -174,7 +174,7 @@ public: void visitVAArgInst(VAArgInst &I); void visitInstruction(Instruction &I) { - errs() << I; + errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); } @@ -207,33 +207,33 @@ private: // Helper functions void initializeExternalFunctions(); GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF); GenericValue getOperandValue(Value *V, ExecutionContext &SF); - GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy, + GenericValue executeTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy, + GenericValue executeSExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy, + GenericValue executeZExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy, + GenericValue executeFPTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy, + GenericValue executeFPExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy, + GenericValue executeFPToUIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy, + GenericValue executeFPToSIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy, + GenericValue executeUIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy, + GenericValue executeSIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy, + GenericValue executePtrToIntInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy, + GenericValue executeIntToPtrInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy, + GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, - const Type *Ty, ExecutionContext &SF); - void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result); + Type *Ty, ExecutionContext &SF); + void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result); }; diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp index fa8bee4..2251a8e 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp @@ -52,6 +52,7 @@ static void runAtExitHandlers() { #include <sys/stat.h> #endif #include <fcntl.h> +#include <unistd.h> /* stat functions are redirecting to __xstat with a version number. On x86-64 * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp index 445d2d0..d773009 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp @@ -390,8 +390,8 @@ GenericValue JIT::runFunction(Function *F, void *FPtr = getPointerToFunction(F); assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); - const FunctionType *FTy = F->getFunctionType(); - const Type *RetTy = FTy->getReturnType(); + FunctionType *FTy = F->getFunctionType(); + Type *RetTy = FTy->getReturnType(); assert((FTy->getNumParams() == ArgValues.size() || (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && @@ -500,7 +500,7 @@ GenericValue JIT::runFunction(Function *F, SmallVector<Value*, 8> Args; for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { Constant *C = 0; - const Type *ArgTy = FTy->getParamType(i); + Type *ArgTy = FTy->getParamType(i); const GenericValue &AV = ArgValues[i]; switch (ArgTy->getTypeID()) { default: llvm_unreachable("Unknown argument type for function call!"); @@ -788,7 +788,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { // be allocated into the same buffer, but in general globals are allocated // through the memory manager which puts them near the code but not in the // same buffer. - const Type *GlobalType = GV->getType()->getElementType(); + Type *GlobalType = GV->getType()->getElementType(); size_t S = getTargetData()->getTypeAllocSize(GlobalType); size_t A = getTargetData()->getPreferredAlignment(GV); if (GV->isThreadLocal()) { diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h index b879fc3..92dcb0e 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h @@ -100,9 +100,10 @@ public: CodeGenOpt::Level OptLevel = CodeGenOpt::Default, bool GVsWithCode = true, - CodeModel::Model CMM = CodeModel::Default) { + Reloc::Model RM = Reloc::Default, + CodeModel::Model CMM = CodeModel::JITDefault) { return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, - CMM); + RM, CMM); } virtual void addModule(Module *M); diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index ddb0d54..8f84ac7 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -18,12 +18,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" @@ -45,7 +45,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, TD = TM.getTargetData(); stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection(); RI = TM.getRegisterInfo(); - TFI = TM.getFrameLowering(); + MAI = TM.getMCAsmInfo(); JCE = &jce; unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction, @@ -523,9 +523,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); } - std::vector<MachineMove> Moves; - TFI->getInitialFrameState(Moves); - EmitFrameMoves(0, Moves); + EmitFrameMoves(0, MAI->getInitialFrameState()); JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop); diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index e1d0045..8dc99ab 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -22,8 +22,8 @@ class JITCodeEmitter; class MachineFunction; class MachineModuleInfo; class MachineMove; +class MCAsmInfo; class TargetData; -class TargetFrameLowering; class TargetMachine; class TargetRegisterInfo; @@ -31,7 +31,7 @@ class JITDwarfEmitter { const TargetData* TD; JITCodeEmitter* JCE; const TargetRegisterInfo* RI; - const TargetFrameLowering *TFI; + const MCAsmInfo *MAI; MachineModuleInfo* MMI; JIT& Jit; bool stackGrowthDirection; diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp index d046b8a..24020ee 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -668,6 +668,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); + (void)ActualPtr; Result = JR->TheJIT->getPointerToFunction(F); } @@ -770,7 +771,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, MachineConstantPoolEntry CPE = Constants[i]; unsigned AlignMask = CPE.getAlignment() - 1; Size = (Size + AlignMask) & ~AlignMask; - const Type *Ty = CPE.getType(); + Type *Ty = CPE.getType(); Size += TD->getTypeAllocSize(Ty); } return Size; @@ -1098,7 +1099,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; dbgs().write_hex(CAddr) << "]\n"); - const Type *Ty = CPE.Val.ConstVal->getType(); + Type *Ty = CPE.Val.ConstVal->getType(); Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); } } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt deleted file mode 100644 index f7ed176..0000000 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMCJIT - MCJIT.cpp - TargetSelect.cpp - ) diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp index e431c84..f83f428 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp @@ -52,6 +52,7 @@ static void runAtExitHandlers() { #include <sys/stat.h> #endif #include <fcntl.h> +#include <unistd.h> /* stat functions are redirecting to __xstat with a version number. On x86-64 * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 4475f4d..7c8a740 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -59,6 +59,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, bool AllocateGVsWithCode) : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { + setTargetData(TM->getTargetData()); PM.add(new TargetData(*TM->getTargetData())); // Turn the machine code intermediate representation into bytes in memory @@ -124,8 +125,8 @@ GenericValue MCJIT::runFunction(Function *F, void *FPtr = getPointerToFunction(F); assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); - const FunctionType *FTy = F->getFunctionType(); - const Type *RetTy = FTy->getReturnType(); + FunctionType *FTy = F->getFunctionType(); + Type *RetTy = FTy->getReturnType(); assert((FTy->getNumParams() == ArgValues.size() || (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && @@ -216,6 +217,6 @@ GenericValue MCJIT::runFunction(Function *F, } } - assert("Full-featured argument passing not supported yet!"); + assert(0 && "Full-featured argument passing not supported yet!"); return GenericValue(); } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile deleted file mode 100644 index 967efbc..0000000 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMCJIT - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt deleted file mode 100644 index 9e53f87..0000000 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMRuntimeDyld - RuntimeDyld.cpp - ) diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile deleted file mode 100644 index 5d6f26d..0000000 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMRuntimeDyld - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index bcdfb04..7190a3c 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -143,7 +143,7 @@ public: bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const { return isKnownFormat(InputBuffer); - }; + } }; } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp index f51aff3..004b865 100644 --- a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp +++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp @@ -17,11 +17,11 @@ #include "llvm/Module.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// selectTarget - Pick a target either via -march or by guessing the native @@ -30,6 +30,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, StringRef MArch, StringRef MCPU, const SmallVectorImpl<std::string>& MAttrs, + Reloc::Model RM, + CodeModel::Model CM, std::string *ErrorStr) { Triple TheTriple(Mod->getTargetTriple()); if (TheTriple.getTriple().empty()) @@ -83,8 +85,9 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, } // Allocate a target... - TargetMachine *Target = - TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr); + TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), + MCPU, FeaturesStr, + RM, CM); assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp index 55aa9bf..03a962e 100644 --- a/contrib/llvm/lib/Linker/LinkModules.cpp +++ b/contrib/llvm/lib/Linker/LinkModules.cpp @@ -14,9 +14,12 @@ #include "llvm/Linker.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -139,7 +142,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { return false; } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) { StructType *SSTy = cast<StructType>(SrcTy); - if (DSTy->isAnonymous() != SSTy->isAnonymous() || + if (DSTy->isLiteral() != SSTy->isLiteral() || DSTy->isPacked() != SSTy->isPacked()) return false; } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) { @@ -223,7 +226,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { // If this is not a named struct type, then just map all of the elements and // then rebuild the type from inside out. - if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) { + if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) { // If there are no element types to map, then the type is itself. This is // true for the anonymous {} struct, things like 'float', integers, etc. if (Ty->getNumContainedTypes() == 0) @@ -261,7 +264,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { cast<PointerType>(Ty)->getAddressSpace()); case Type::FunctionTyID: return *Entry = FunctionType::get(ElementTypes[0], - ArrayRef<Type*>(ElementTypes).slice(1), + makeArrayRef(ElementTypes).slice(1), cast<FunctionType>(Ty)->isVarArg()); case Type::StructTyID: // Note that this is only reached for anonymous structs. @@ -302,7 +305,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { // Otherwise we create a new type and resolve its body later. This will be // resolved by the top level of get(). DefinitionsToResolve.push_back(STy); - return *Entry = StructType::createNamed(STy->getContext(), ""); + return *Entry = StructType::create(STy->getContext()); } @@ -333,10 +336,16 @@ namespace { std::vector<AppendingVarInfo> AppendingVars; + unsigned Mode; // Mode to treat source module. + + // Set of items not to link in from source. + SmallPtrSet<const Value*, 16> DoNotLinkFromSource; + public: std::string ErrorMsg; - ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { } + ModuleLinker(Module *dstM, Module *srcM, unsigned mode) + : DstM(dstM), SrcM(srcM), Mode(mode) { } bool run(); @@ -596,9 +605,9 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); DstGV->eraseFromParent(); - // Zap the initializer in the source variable so we don't try to link it. - SrcGV->setInitializer(0); - SrcGV->setLinkage(GlobalValue::ExternalLinkage); + // Track the source variable so we don't try to link it. + DoNotLinkFromSource.insert(SrcGV); + return false; } @@ -633,11 +642,10 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { // Make sure to remember this mapping. ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType())); - // Destroy the source global's initializer (and convert it to a prototype) - // so that we don't attempt to copy it over when processing global - // initializers. - SGV->setInitializer(0); - SGV->setLinkage(GlobalValue::ExternalLinkage); + // Track the source global so that we don't attempt to copy it over when + // processing global initializers. + DoNotLinkFromSource.insert(SGV); + return false; } } @@ -682,8 +690,10 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { // Make sure to remember this mapping. ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType())); - // Remove the body from the source module so we don't attempt to remap it. - SF->deleteBody(); + // Track the function from the source module so we don't attempt to remap + // it. + DoNotLinkFromSource.insert(SF); + return false; } } @@ -722,8 +732,9 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { // Make sure to remember this mapping. ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType())); - // Remove the body from the source module so we don't attempt to remap it. - SGA->setAliasee(0); + // Track the alias from the source module so we don't attempt to remap it. + DoNotLinkFromSource.insert(SGA); + return false; } } @@ -779,7 +790,9 @@ void ModuleLinker::linkGlobalInits() { // Loop over all of the globals in the src module, mapping them over as we go for (Module::const_global_iterator I = SrcM->global_begin(), E = SrcM->global_end(); I != E; ++I) { - if (!I->hasInitializer()) continue; // Only process initialized GV's. + + // Only process initialized GV's or ones not already in dest. + if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue; // Grab destination global variable. GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]); @@ -805,31 +818,42 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { ValueMap[I] = DI; } - // Splice the body of the source function into the dest function. - Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList()); - - // At this point, all of the instructions and values of the function are now - // copied over. The only problem is that they are still referencing values in - // the Source function as operands. Loop through all of the operands of the - // functions and patch them up to point to the local versions. - for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap); - + if (Mode == Linker::DestroySource) { + // Splice the body of the source function into the dest function. + Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList()); + + // At this point, all of the instructions and values of the function are now + // copied over. The only problem is that they are still referencing values in + // the Source function as operands. Loop through all of the operands of the + // functions and patch them up to point to the local versions. + for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap); + + } else { + // Clone the body of the function into the dest function. + SmallVector<ReturnInst*, 8> Returns; // Ignore returns. + CloneFunctionInto(Dst, Src, ValueMap, false, Returns); + } + // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); I != E; ++I) ValueMap.erase(I); + } void ModuleLinker::linkAliasBodies() { for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end(); - I != E; ++I) + I != E; ++I) { + if (DoNotLinkFromSource.count(I)) + continue; if (Constant *Aliasee = I->getAliasee()) { GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]); DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap)); } + } } /// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest @@ -891,16 +915,10 @@ bool ModuleLinker::run() { StringRef ModuleId = SrcM->getModuleIdentifier(); if (!ModuleId.empty()) DstM->removeLibrary(sys::path::stem(ModuleId)); - // Loop over all of the linked values to compute type mappings. computeTypeMapping(); - // Remap all of the named mdnoes in Src into the DstM module. We do this - // after linking GlobalValues so that MDNodes that reference GlobalValues - // are properly remapped. - linkNamedMDNodes(); - // Insert all of the globals in src into the DstM module... without linking // initializers (which could refer to functions not yet mapped over). for (Module::global_iterator I = SrcM->global_begin(), @@ -933,7 +951,17 @@ bool ModuleLinker::run() { // Link in the function bodies that are defined in the source module into // DstM. for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) { - if (SF->isDeclaration()) continue; // No body if function is external. + + // Skip if not linking from source. + if (DoNotLinkFromSource.count(SF)) continue; + + // Skip if no body (function is external) or materialize. + if (SF->isDeclaration()) { + if (!SF->isMaterializable()) + continue; + if (SF->Materialize(&ErrorMsg)) + return true; + } linkFunctionBody(cast<Function>(ValueMap[SF]), SF); } @@ -941,6 +969,11 @@ bool ModuleLinker::run() { // Resolve all uses of aliases with aliasees. linkAliasBodies(); + // Remap all of the named mdnoes in Src into the DstM module. We do this + // after linking GlobalValues so that MDNodes that reference GlobalValues + // are properly remapped. + linkNamedMDNodes(); + // Now that all of the types from the source are used, resolve any structs // copied over to the dest that didn't exist there. TypeMap.linkDefinedTypeBodies(); @@ -957,8 +990,9 @@ bool ModuleLinker::run() { // error occurs, true is returned and ErrorMsg (if not null) is set to indicate // the problem. Upon failure, the Dest module could be in a modified state, and // shouldn't be relied on to be consistent. -bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { - ModuleLinker TheLinker(Dest, Src); +bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, + std::string *ErrorMsg) { + ModuleLinker TheLinker(Dest, Src, Mode); if (TheLinker.run()) { if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg; return true; diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp index fba91da..59fbceb 100644 --- a/contrib/llvm/lib/Linker/Linker.cpp +++ b/contrib/llvm/lib/Linker/Linker.cpp @@ -141,6 +141,14 @@ static inline sys::Path IsLibrary(StringRef Name, if (FullPath.isBitcodeFile()) // .so file containing bitcode? return FullPath; + // Try libX form, to make it possible to add dependency on the + // specific version of .so, like liblzma.so.1.0.0 + FullPath.eraseSuffix(); + if (FullPath.isDynamicLibrary()) // Native shared library? + return FullPath; + if (FullPath.isBitcodeFile()) // .so file containing bitcode? + return FullPath; + // Not found .. fall through // Indicate that the library was not found in the directory. diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp index 59e1b8e..3d16de5 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -23,13 +24,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ELF.h" -#include "llvm/Target/TargetAsmBackend.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" -#include "../Target/X86/X86FixupKinds.h" -#include "../Target/ARM/ARMFixupKinds.h" +#include "../Target/X86/MCTargetDesc/X86FixupKinds.h" +#include "../Target/ARM/MCTargetDesc/ARMFixupKinds.h" +#include "../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h" #include <vector> using namespace llvm; @@ -124,12 +125,12 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize, // e_shnum = # of section header ents if (NumberOfSections >= ELF::SHN_LORESERVE) - Write16(0); + Write16(ELF::SHN_UNDEF); else Write16(NumberOfSections); // e_shstrndx = Section # of '.shstrtab' - if (NumberOfSections >= ELF::SHN_LORESERVE) + if (ShstrtabIndex >= ELF::SHN_LORESERVE) Write16(ELF::SHN_XINDEX); else Write16(ShstrtabIndex); @@ -301,7 +302,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL || Section.getType() == ELF::SHT_STRTAB || - Section.getType() == ELF::SHT_SYMTAB) + Section.getType() == ELF::SHT_SYMTAB || + Section.getType() == ELF::SHT_SYMTAB_SHNDX) continue; WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false); @@ -447,8 +449,16 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + adjustFixupOffset(Fixup, RelocOffset); + if (!hasRelocationAddend()) Addend = 0; + + if (is64Bit()) + assert(isInt<64>(Addend)); + else + assert(isInt<32>(Addend)); + ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend); Relocations[Fragment->getParent()].push_back(ERE); } @@ -656,6 +666,9 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, ExternalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + if (NumRegularSections > ELF::SHN_LORESERVE) + NeedsSymtabShndx = true; } void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm, @@ -992,11 +1005,10 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, // Nothing to do. break; - case ELF::SHT_GROUP: { + case ELF::SHT_GROUP: sh_link = SymbolTableIndex; sh_info = GroupSymbolIndex; break; - } default: assert(0 && "FIXME: sh_type value not supported!"); @@ -1224,7 +1236,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, FileOff = OS.tell(); - // ... and then the remainting sections ... + // ... and then the remaining sections ... for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) WriteDataSectionData(Asm, Layout, *Sections[i]); } @@ -1252,6 +1264,11 @@ MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break; case ELF::EM_MBLAZE: return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break; + case ELF::EM_PPC: + case ELF::EM_PPC64: + return new PPCELFObjectWriter(MOTW, OS, IsLittleEndian); break; + case ELF::EM_MIPS: + return new MipsELFObjectWriter(MOTW, OS, IsLittleEndian); break; default: llvm_unreachable("Unsupported architecture"); break; } } @@ -1503,6 +1520,76 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return Type; } +//===- PPCELFObjectWriter -------------------------------------------===// + +PPCELFObjectWriter::PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian) + : ELFObjectWriter(MOTW, _OS, IsLittleEndian) { +} + +PPCELFObjectWriter::~PPCELFObjectWriter() { +} + +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) { + // determine the type of the relocation + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_REL24; + break; + case FK_PCRel_4: + Type = ELF::R_PPC_REL32; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_ADDR24; + break; + case PPC::fixup_ppc_brcond14: + Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + break; + case PPC::fixup_ppc_ha16: + Type = ELF::R_PPC_ADDR16_HA; + break; + case PPC::fixup_ppc_lo16: + Type = ELF::R_PPC_ADDR16_LO; + break; + case PPC::fixup_ppc_lo14: + Type = ELF::R_PPC_ADDR14; + break; + case FK_Data_4: + Type = ELF::R_PPC_ADDR32; + break; + case FK_Data_2: + Type = ELF::R_PPC_ADDR16; + break; + } + } + return Type; +} + +void +PPCELFObjectWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { + switch ((unsigned)Fixup.getKind()) { + case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_lo16: + RelocOffset += 2; + break; + default: + break; + } +} + //===- MBlazeELFObjectWriter -------------------------------------------===// MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW, @@ -1624,7 +1711,6 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, default: llvm_unreachable("invalid fixup kind!"); case FK_Data_8: Type = ELF::R_X86_64_64; break; case X86::reloc_signed_4byte: - assert(isInt<32>(Target.getConstant())); switch (Modifier) { default: llvm_unreachable("Unimplemented"); @@ -1728,3 +1814,19 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } + +MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian) + : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {} + +MipsELFObjectWriter::~MipsELFObjectWriter() {} + +unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) { + // tbd + return 1; +} diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.h b/contrib/llvm/lib/MC/ELFObjectWriter.h index 7593099..862b085 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.h +++ b/contrib/llvm/lib/MC/ELFObjectWriter.h @@ -347,6 +347,7 @@ class ELFObjectWriter : public MCObjectWriter { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) = 0; + virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } }; //===- X86ELFObjectWriter -------------------------------------------===// @@ -395,6 +396,22 @@ class ELFObjectWriter : public MCObjectWriter { }; + //===- PPCELFObjectWriter -------------------------------------------===// + + class PPCELFObjectWriter : public ELFObjectWriter { + public: + PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~PPCELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + }; + //===- MBlazeELFObjectWriter -------------------------------------------===// class MBlazeELFObjectWriter : public ELFObjectWriter { @@ -409,6 +426,21 @@ class ELFObjectWriter : public MCObjectWriter { bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend); }; + + //===- MipsELFObjectWriter -------------------------------------------===// + + class MipsELFObjectWriter : public ELFObjectWriter { + public: + MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~MipsELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; } #endif diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/MCAsmBackend.cpp index 1927557..2c150f4 100644 --- a/contrib/llvm/lib/MC/TargetAsmBackend.cpp +++ b/contrib/llvm/lib/MC/MCAsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==// +//===-- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------==// // // The LLVM Compiler Infrastructure // @@ -7,19 +7,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/MC/MCAsmBackend.h" using namespace llvm; -TargetAsmBackend::TargetAsmBackend() +MCAsmBackend::MCAsmBackend() : HasReliableSymbolDifference(false) { } -TargetAsmBackend::~TargetAsmBackend() { +MCAsmBackend::~MCAsmBackend() { } const MCFixupKindInfo & -TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { +MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { static const MCFixupKindInfo Builtins[] = { { "FK_Data_1", 0, 8, 0 }, { "FK_Data_2", 0, 16, 0 }, diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp index 502b60b..95861bc 100644 --- a/contrib/llvm/lib/MC/MCAsmInfo.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp @@ -29,6 +29,7 @@ MCAsmInfo::MCAsmInfo() { HasSubsectionsViaSymbols = false; HasMachoZeroFillDirective = false; HasMachoTBSSDirective = false; + StructorOutputOrder = Structors::ReversePriorityOrder; HasStaticCtorDtorReferenceInStaticMode = false; LinkerRequiresNonEmptyDwarfLines = false; MaxInstLength = 4; @@ -42,6 +43,9 @@ MCAsmInfo::MCAsmInfo() { LinkerPrivateGlobalPrefix = ""; InlineAsmStart = "APP"; InlineAsmEnd = "NO_APP"; + Code16Directive = ".code16"; + Code32Directive = ".code32"; + Code64Directive = ".code64"; AssemblerDialect = 0; AllowQuotesInName = false; AllowNameToStartWithDigit = false; @@ -53,6 +57,12 @@ MCAsmInfo::MCAsmInfo() { Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; Data64bitsDirective = "\t.quad\t"; + DataBegin = "$d."; + CodeBegin = "$a."; + JT8Begin = "$d."; + JT16Begin = "$d."; + JT32Begin = "$d."; + SupportsDataRegions = false; SunStyleELFSectionSwitchSyntax = false; UsesELFSectionDirectiveForBSS = false; AlignDirective = "\t.align\t"; @@ -62,7 +72,7 @@ MCAsmInfo::MCAsmInfo() { GlobalDirective = "\t.globl\t"; HasSetDirective = true; HasAggressiveSymbolFolding = true; - HasLCOMMDirective = false; + LCOMMDirectiveType = LCOMM::None; COMMDirectiveAlignmentIsInBytes = true; HasDotTypeDotSizeDirective = true; HasSingleParameterDotFile = true; diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp index 7fc7d7a..434d910 100644 --- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -19,7 +19,7 @@ using namespace llvm; MCAsmInfoCOFF::MCAsmInfoCOFF() { GlobalPrefix = "_"; COMMDirectiveAlignmentIsInBytes = false; - HasLCOMMDirective = true; + LCOMMDirectiveType = LCOMM::ByteAlignment; HasDotTypeDotSizeDirective = false; HasSingleParameterDotFile = false; PrivateGlobalPrefix = "L"; // Prefix for private global symbols @@ -27,11 +27,14 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { LinkOnceDirective = "\t.linkonce discard\n"; // Doesn't support visibility: - HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid; + HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid; + ProtectedVisibilityAttr = MCSA_Invalid; // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) SupportsDebugInformation = true; DwarfSectionOffsetDirective = "\t.secrel32\t"; HasMicrosoftFastStdCallMangling = true; + + SupportsDataRegions = false; } diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp index 5851cb0..b20e338 100644 --- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -39,8 +39,16 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill HasMachoTBSSDirective = true; // Uses .tbss + StructorOutputOrder = Structors::PriorityOrder; HasStaticCtorDtorReferenceInStaticMode = true; + CodeBegin = "L$start$code$"; + DataBegin = "L$start$data$"; + JT8Begin = "L$start$jt8$"; + JT16Begin = "L$start$jt16$"; + JT32Begin = "L$start$jt32$"; + SupportsDataRegions = true; + // FIXME: Darwin 10 and newer don't need this. LinkerRequiresNonEmptyDwarfLines = true; diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp index d5d08e8..3fcbb05 100644 --- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -15,8 +15,12 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -25,9 +29,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include <cctype> using namespace llvm; @@ -40,7 +41,7 @@ protected: private: OwningPtr<MCInstPrinter> InstPrinter; OwningPtr<MCCodeEmitter> Emitter; - OwningPtr<TargetAsmBackend> AsmBackend; + OwningPtr<MCAsmBackend> AsmBackend; SmallString<128> CommentToEmit; raw_svector_ostream CommentStream; @@ -63,7 +64,7 @@ public: MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *printer, MCCodeEmitter *emitter, - TargetAsmBackend *asmbackend, + MCAsmBackend *asmbackend, bool showInst) : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), @@ -157,7 +158,9 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + /// @param Size - The alignment of the common symbol in bytes. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); @@ -334,8 +337,9 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { default: assert(0 && "Invalid flag!"); case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break; case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break; - case MCAF_Code16: OS << "\t.code\t16"; break; - case MCAF_Code32: OS << "\t.code\t32"; break; + case MCAF_Code16: OS << '\t'<< MAI.getCode16Directive(); break; + case MCAF_Code32: OS << '\t'<< MAI.getCode32Directive(); break; + case MCAF_Code64: OS << '\t'<< MAI.getCode64Directive(); break; } EmitEOL(); } @@ -482,9 +486,16 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. -void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { - assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!"); +void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) { + assert(MAI.getLCOMMDirectiveType() != LCOMM::None && + "Doesn't have .lcomm, can't emit it!"); OS << "\t.lcomm\t" << *Symbol << ',' << Size; + if (ByteAlign > 1) { + assert(MAI.getLCOMMDirectiveType() == LCOMM::ByteAlignment && + "Alignment not supported on .lcomm!"); + OS << ',' << ByteAlign; + } EmitEOL(); } @@ -827,8 +838,8 @@ void MCAsmStreamer::EmitCFIEndProc() { void MCAsmStreamer::EmitRegisterName(int64_t Register) { if (InstPrinter && !MAI.useDwarfRegNumForCFI()) { - const TargetAsmInfo &TAI = getContext().getTargetAsmInfo(); - unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true); + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + unsigned LLVMRegister = MRI.getLLVMRegNum(Register, true); InstPrinter->printRegName(OS, LLVMRegister); } else { OS << Register; @@ -994,6 +1005,19 @@ void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind, EmitEOL(); } +static const MCSection *getWin64EHTableSection(StringRef suffix, + MCContext &context) { + // FIXME: This doesn't belong in MCObjectFileInfo. However, + /// this duplicate code in MCWin64EH.cpp. + if (suffix == "") + return context.getObjectFileInfo()->getXDataSection(); + return context.getCOFFSection((".xdata"+suffix).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + void MCAsmStreamer::EmitWin64EHHandlerData() { MCStreamer::EmitWin64EHHandlerData(); @@ -1003,8 +1027,7 @@ void MCAsmStreamer::EmitWin64EHHandlerData() { // data block is visible. MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo(); StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function); - const MCSection *xdataSect = - getContext().getTargetAsmInfo().getWin64EHTableSection(suffix); + const MCSection *xdataSect = getWin64EHTableSection(suffix, getContext()); if (xdataSect) SwitchSectionNoChange(xdataSect); @@ -1221,7 +1244,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) - InstPrinter->printInst(&Inst, OS); + InstPrinter->printInst(&Inst, OS, ""); else Inst.print(OS, &MAI); EmitEOL(); @@ -1249,8 +1272,8 @@ MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, - MCCodeEmitter *CE, TargetAsmBackend *TAB, + MCCodeEmitter *CE, MCAsmBackend *MAB, bool ShowInst) { return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI, - IP, CE, TAB, ShowInst); + IP, CE, MAB, ShowInst); } diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp index 527a63c..06c8aec 100644 --- a/contrib/llvm/lib/MC/MCAssembler.cpp +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -25,8 +26,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -194,7 +194,7 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, /* *** */ -MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_, +MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_, MCCodeEmitter &Emitter_, MCObjectWriter &Writer_, raw_ostream &OS_) : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_), diff --git a/contrib/llvm/lib/MC/MCAtom.cpp b/contrib/llvm/lib/MC/MCAtom.cpp new file mode 100644 index 0000000..d714443 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAtom.cpp @@ -0,0 +1,97 @@ +//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCModule.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) { + assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!"); + + assert(Address < End+Size && + "Instruction not contiguous with end of atom!"); + if (Address > End) + Parent->remap(this, Begin, End+Size); + + Text.push_back(std::make_pair(Address, I)); +} + +void MCAtom::addData(const MCData &D) { + assert(Type == DataAtom && "Trying to add MCData to a non-data atom!"); + Parent->remap(this, Begin, End+1); + + Data.push_back(D); +} + +MCAtom *MCAtom::split(uint64_t SplitPt) { + assert((SplitPt > Begin && SplitPt <= End) && + "Splitting at point not contained in atom!"); + + // Compute the new begin/end points. + uint64_t LeftBegin = Begin; + uint64_t LeftEnd = SplitPt - 1; + uint64_t RightBegin = SplitPt; + uint64_t RightEnd = End; + + // Remap this atom to become the lower of the two new ones. + Parent->remap(this, LeftBegin, LeftEnd); + + // Create a new atom for the higher atom. + MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd); + + // Split the contents of the original atom between it and the new one. The + // precise method depends on whether this is a data or a text atom. + if (isDataAtom()) { + std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin); + + assert(I != Data.end() && "Split point not found in range!"); + + std::copy(I, Data.end(), RightAtom->Data.end()); + Data.erase(I, Data.end()); + } else if (isTextAtom()) { + std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin(); + + while (I != Text.end() && I->first < SplitPt) ++I; + + assert(I != Text.end() && "Split point not found in disassembly!"); + assert(I->first == SplitPt && + "Split point does not fall on instruction boundary!"); + + std::copy(I, Text.end(), RightAtom->Text.end()); + Text.erase(I, Text.end()); + } else + llvm_unreachable("Unknown atom type!"); + + return RightAtom; +} + +void MCAtom::truncate(uint64_t TruncPt) { + assert((TruncPt >= Begin && TruncPt < End) && + "Truncation point not contained in atom!"); + + Parent->remap(this, Begin, TruncPt); + + if (isDataAtom()) { + Data.resize(TruncPt - Begin + 1); + } else if (isTextAtom()) { + std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin(); + + while (I != Text.end() && I->first <= TruncPt) ++I; + + assert(I != Text.end() && "Truncation point not found in disassembly!"); + assert(I->first == TruncPt+1 && + "Truncation point does not fall on instruction boundary"); + + Text.erase(I, Text.end()); + } else + llvm_unreachable("Unknown atom type!"); +} + diff --git a/contrib/llvm/lib/MC/MCCodeGenInfo.cpp b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp new file mode 100644 index 0000000..236e7de --- /dev/null +++ b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp @@ -0,0 +1,21 @@ +//===-- MCCodeGenInfo.cpp - Target CodeGen Info -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file tracks information about the target which can affect codegen, +// asm parsing, and asm printing. For example, relocation model. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCCodeGenInfo.h" +using namespace llvm; + +void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM) { + RelocationModel = RM; + CMModel = CM; +} diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp index 8faa72e..82690ee 100644 --- a/contrib/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -9,13 +9,14 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCLabel.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ELF.h" @@ -26,8 +27,9 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; -MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) : - MAI(mai), TAI(tai), +MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri, + const MCObjectFileInfo *mofi) : + MAI(mai), MRI(mri), MOFI(mofi), Allocator(), Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), @@ -54,8 +56,6 @@ MCContext::~MCContext() { // If the stream for the .secure_log_unique directive was created free it. delete (raw_ostream*)SecureLog; - - delete TAI; } //===----------------------------------------------------------------------===// @@ -279,7 +279,8 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) { } else { StringRef Directory = Slash.first; Name = Slash.second; - for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) { + DirIndex = 0; + for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) { if (Directory == MCDwarfDirs[DirIndex]) break; } diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp index 5480b4b..16e66dc 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -1,4 +1,4 @@ -//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===// +//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,14 @@ #include "llvm-c/Disassembler.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmInfo.h" // FIXME. -#include "llvm/Target/TargetMachine.h" // FIXME. -#include "llvm/Target/TargetSelect.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" namespace llvm { class Target; @@ -38,10 +37,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, LLVMSymbolLookupCallback SymbolLookUp) { // Initialize targets and assembly printers/parsers. llvm::InitializeAllTargetInfos(); - // FIXME: We shouldn't need to initialize the Target(Machine)s. - llvm::InitializeAllTargets(); - llvm::InitializeAllMCAsmInfos(); - llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllTargetMCs(); llvm::InitializeAllAsmParsers(); llvm::InitializeAllDisassemblers(); @@ -54,41 +50,38 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName); assert(MAI && "Unable to create target asm info!"); + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName); + assert(MRI && "Unable to create target register info!"); + // Package up features to be passed to target/subtarget std::string FeaturesStr; std::string CPU; - // FIXME: We shouldn't need to do this (and link in codegen). - // When we split this out, we should do it in a way that makes - // it straightforward to switch subtargets on the fly. - TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU, - FeaturesStr); - assert(TM && "Unable to create target machine!"); - - // Get the target assembler info needed to setup the context. - const TargetAsmInfo *tai = new TargetAsmInfo(*TM); - assert(tai && "Unable to create target assembler!"); + const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, CPU, + FeaturesStr); + assert(STI && "Unable to create subtarget info!"); // Set up the MCContext for creating symbols and MCExpr's. - MCContext *Ctx = new MCContext(*MAI, tai); + MCContext *Ctx = new MCContext(*MAI, *MRI, 0); assert(Ctx && "Unable to create MCContext!"); // Set up disassembler. - MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); assert(DisAsm && "Unable to create disassembler!"); - DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, - *MAI); + *MAI, *STI); assert(IP && "Unable to create instruction printer!"); LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType, GetOpInfo, SymbolLookUp, - TheTarget, MAI, TM, tai, Ctx, - DisAsm, IP); + TheTarget, MAI, MRI, + Ctx, DisAsm, IP); assert(DC && "Allocation failure!"); + return DC; } @@ -147,18 +140,35 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, MCInst Inst; const MCDisassembler *DisAsm = DC->getDisAsm(); MCInstPrinter *IP = DC->getIP(); - if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls())) + MCDisassembler::DecodeStatus S; + S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC, + /*REMOVE*/ nulls(), DC->CommentStream); + switch (S) { + case MCDisassembler::Fail: + case MCDisassembler::SoftFail: + // FIXME: Do something different for soft failure modes? return 0; - SmallVector<char, 64> InsnStr; - raw_svector_ostream OS(InsnStr); - IP->printInst(&Inst, OS); - OS.flush(); + case MCDisassembler::Success: { + DC->CommentStream.flush(); + StringRef Comments = DC->CommentsToEmit.str(); - assert(OutStringSize != 0 && "Output buffer cannot be zero size"); - size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); - std::memcpy(OutString, InsnStr.data(), OutputSize); - OutString[OutputSize] = '\0'; // Terminate string. + SmallVector<char, 64> InsnStr; + raw_svector_ostream OS(InsnStr); + IP->printInst(&Inst, OS, Comments); + OS.flush(); - return Size; + // Tell the comment stream that the vector changed underneath it. + DC->CommentsToEmit.clear(); + DC->CommentStream.resync(); + + assert(OutStringSize != 0 && "Output buffer cannot be zero size"); + size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); + std::memcpy(OutString, InsnStr.data(), OutputSize); + OutString[OutputSize] = '\0'; // Terminate string. + + return Size; + } + } + return 0; } diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h index f0ec42a..238ff7d 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h @@ -20,15 +20,16 @@ #include "llvm-c/Disassembler.h" #include <string> #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { -class TargetAsmInfo; class MCContext; class MCAsmInfo; class MCDisassembler; class MCInstPrinter; +class MCRegisterInfo; class Target; -class TargetMachine; // // This is the disassembler context returned by LLVMCreateDisasm(). @@ -58,12 +59,8 @@ private: const Target *TheTarget; // The assembly information for the target architecture. llvm::OwningPtr<const llvm::MCAsmInfo> MAI; - // The target machine instance. - llvm::OwningPtr<llvm::TargetMachine> TM; - // The disassembler for the target architecture. - // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc - // error when this LLVMDisasmContext is deleted. - const TargetAsmInfo *Tai; + // The register information for the target architecture. + llvm::OwningPtr<const llvm::MCRegisterInfo> MRI; // The assembly context for creating symbols and MCExprs. llvm::OwningPtr<const llvm::MCContext> Ctx; // The disassembler for the target architecture. @@ -72,22 +69,28 @@ private: llvm::OwningPtr<llvm::MCInstPrinter> IP; public: + // Comment stream and backing vector. + SmallString<128> CommentsToEmit; + raw_svector_ostream CommentStream; + LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType, LLVMOpInfoCallback getOpInfo, LLVMSymbolLookupCallback symbolLookUp, const Target *theTarget, const MCAsmInfo *mAI, - llvm::TargetMachine *tM, const TargetAsmInfo *tai, + const MCRegisterInfo *mRI, llvm::MCContext *ctx, const MCDisassembler *disAsm, MCInstPrinter *iP) : TripleName(tripleName), DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), - SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) { - TM.reset(tM); + SymbolLookUp(symbolLookUp), TheTarget(theTarget), + CommentStream(CommentsToEmit) { MAI.reset(mAI); + MRI.reset(mRI); Ctx.reset(ctx); DisAsm.reset(disAsm); IP.reset(iP); } const MCDisassembler *getDisAsm() const { return DisAsm.get(); } + const MCAsmInfo *getAsmInfo() const { return MAI.get(); } MCInstPrinter *getIP() { return IP.get(); } }; diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp index bdd99af..83362a2 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -22,20 +22,19 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCTargetAsmLexer.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetAsmParser.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSelect.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" using namespace llvm; bool EDDisassembler::sInitialized = false; @@ -106,9 +105,7 @@ void EDDisassembler::initialize() { sInitialized = true; InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllMCAsmInfos(); - InitializeAllAsmPrinters(); + InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllDisassemblers(); } @@ -169,24 +166,24 @@ EDDisassembler::EDDisassembler(CPUKey &key) : if (!Tgt) return; - std::string CPU; - std::string featureString; - TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU, - featureString)); + MRI.reset(Tgt->createMCRegInfo(tripleString)); - const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); - - if (!registerInfo) + if (!MRI) return; - - initMaps(*registerInfo); + + initMaps(*MRI); AsmInfo.reset(Tgt->createMCAsmInfo(tripleString)); if (!AsmInfo) return; - Disassembler.reset(Tgt->createMCDisassembler()); + STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", "")); + + if (!STI) + return; + + Disassembler.reset(Tgt->createMCDisassembler(*STI)); if (!Disassembler) return; @@ -195,16 +192,16 @@ EDDisassembler::EDDisassembler(CPUKey &key) : InstString.reset(new std::string); InstStream.reset(new raw_string_ostream(*InstString)); - InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI)); if (!InstPrinter) return; GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); - SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); + SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo)); SpecificAsmLexer->InstallLexer(*GenericAsmLexer); - initMaps(*TargetMachine->getRegisterInfo()); + initMaps(*MRI); Valid = true; } @@ -247,14 +244,17 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, MCInst* inst = new MCInst; uint64_t byteSize; - if (!Disassembler->getInstruction(*inst, - byteSize, - memoryObject, - address, - ErrorStream)) { + MCDisassembler::DecodeStatus S; + S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address, + ErrorStream, nulls()); + switch (S) { + case MCDisassembler::Fail: + case MCDisassembler::SoftFail: + // FIXME: Do something different on soft failure mode? delete inst; return NULL; - } else { + + case MCDisassembler::Success: { const llvm::EDInstInfo *thisInstInfo = NULL; if (InstInfos) { @@ -264,9 +264,11 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); return sdInst; } + } + return NULL; } -void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { +void EDDisassembler::initMaps(const MCRegisterInfo ®isterInfo) { unsigned numRegisters = registerInfo.getNumRegs(); unsigned registerIndex; @@ -325,7 +327,7 @@ bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { int EDDisassembler::printInst(std::string &str, MCInst &inst) { PrinterMutex.acquire(); - InstPrinter->printInst(&inst, *InstStream); + InstPrinter->printInst(&inst, *InstStream, ""); InstStream->flush(); str = *InstString; InstString->clear(); @@ -368,16 +370,16 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, SourceMgr sourceMgr; sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this)); sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over - MCContext context(*AsmInfo, NULL); + MCContext context(*AsmInfo, *MRI, NULL); OwningPtr<MCStreamer> streamer(createNullStreamer(context)); - OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr, + OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr, context, *streamer, *AsmInfo)); StringRef triple = tripleFromArch(Key.Arch); OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", "")); - OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI, - *genericParser)); + OwningPtr<MCTargetAsmParser> + TargetParser(Tgt->createMCAsmParser(*STI, *genericParser)); AsmToken OpcodeToken = genericParser->Lex(); AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h index 11d69c1..38c2203 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h +++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h @@ -29,24 +29,23 @@ namespace llvm { class AsmLexer; +class AsmParser; class AsmToken; class MCContext; class MCAsmInfo; class MCAsmLexer; -class AsmParser; -class TargetAsmLexer; -class TargetAsmParser; class MCDisassembler; class MCInstPrinter; class MCInst; class MCParsedAsmOperand; +class MCRegisterInfo; class MCStreamer; class MCSubtargetInfo; +class MCTargetAsmLexer; +class MCTargetAsmParser; template <typename T> class SmallVectorImpl; class SourceMgr; class Target; -class TargetMachine; -class TargetRegisterInfo; struct EDInstInfo; struct EDInst; @@ -136,10 +135,12 @@ struct EDDisassembler { CPUKey Key; /// The LLVM target corresponding to the disassembler const llvm::Target *Tgt; - /// The target machine instance. - llvm::OwningPtr<llvm::TargetMachine> TargetMachine; /// The assembly information for the target architecture llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo; + /// The subtarget information for the target architecture + llvm::OwningPtr<const llvm::MCSubtargetInfo> STI; + // The register information for the target architecture. + llvm::OwningPtr<const llvm::MCRegisterInfo> MRI; /// The disassembler for the target architecture llvm::OwningPtr<const llvm::MCDisassembler> Disassembler; /// The output string for the instruction printer; must be guarded with @@ -160,7 +161,7 @@ struct EDDisassembler { /// The target-specific lexer for use in tokenizing strings, in /// target-independent and target-specific portions llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer; - llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer; + llvm::OwningPtr<llvm::MCTargetAsmLexer> SpecificAsmLexer; /// The guard for the above llvm::sys::Mutex ParserMutex; /// The LLVM number used for the target disassembly syntax variant @@ -216,7 +217,7 @@ struct EDDisassembler { /// info /// /// @arg registerInfo - the register information to use as a source - void initMaps(const llvm::TargetRegisterInfo ®isterInfo); + void initMaps(const llvm::MCRegisterInfo ®isterInfo); /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a /// register for a given register ID, or NULL on failure /// diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h index ceb9505..6b78dc8 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h +++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h @@ -73,7 +73,7 @@ struct EDInst { std::string String; /// The order in which operands from the InstInfo's operand information appear /// in String - const char* OperandOrder; + const signed char* OperandOrder; /// The result of the parseOperands() function CachedResult ParseResult; diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp index de770b4..5f6c9df 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp @@ -87,14 +87,18 @@ int EDToken::registerID(unsigned ®isterID) const { int EDToken::tokenize(std::vector<EDToken*> &tokens, std::string &str, - const char *operandOrder, + const signed char *operandOrder, EDDisassembler &disassembler) { SmallVector<MCParsedAsmOperand*, 5> parsedOperands; SmallVector<AsmToken, 10> asmTokens; if (disassembler.parseInst(parsedOperands, asmTokens, str)) + { + for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i) + delete parsedOperands[i]; return -1; - + } + SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator; unsigned int operandIndex; SmallVectorImpl<AsmToken>::iterator tokenIterator; diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h index ba46707..384079b 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h +++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h @@ -125,7 +125,7 @@ struct EDToken { // assembly syntax static int tokenize(std::vector<EDToken*> &tokens, std::string &str, - const char *operandOrder, + const signed char *operandOrder, EDDisassembler &disassembler); /// getString - Directs a character pointer to the string, returning 0 on diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index ad86db1..4658a30 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -7,17 +7,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -196,7 +197,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, MCOS->EmitLabel(SectionEnd); // Switch back the the dwarf line section. - MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection()); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, @@ -209,7 +210,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, void MCDwarfFileTable::Emit(MCStreamer *MCOS) { MCContext &context = MCOS->getContext(); // Switch to the section where the table will be emitted into. - MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection()); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); // Create a symbol at the beginning of this section. MCSymbol *LineStartSym = context.CreateTempSymbol(); @@ -485,11 +486,11 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, } static const MachineLocation TranslateMachineLocation( - const TargetAsmInfo &TAI, + const MCRegisterInfo &MRI, const MachineLocation &Loc) { unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? MachineLocation::VirtualFP : - unsigned(TAI.getDwarfRegNum(Loc.getReg(), true)); + unsigned(MRI.getDwarfRegNum(Loc.getReg(), true)); const MachineLocation &NewLoc = Loc.isReg() ? MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); return NewLoc; @@ -503,10 +504,11 @@ namespace { bool IsEH; const MCSymbol *SectionStart; public: - FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) : - CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH), - SectionStart(sectionStart) { - } + FrameEmitterImpl(bool usingCFI, bool isEH) + : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH), + SectionStart(0) {} + + void setSectionStart(const MCSymbol *Label) { SectionStart = Label; } /// EmitCompactUnwind - Emit the unwind information in a compact way. If /// we're successful, return 'true'. Otherwise, return 'false' and it will @@ -687,11 +689,8 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, /// normal CIE and FDE. bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, const MCDwarfFrameInfo &Frame) { -#if 1 - return false; -#else MCContext &Context = Streamer.getContext(); - const TargetAsmInfo &TAI = Context.getTargetAsmInfo(); + const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); bool VerboseAsm = Streamer.isVerboseAsm(); // range-start range-length compact-unwind-enc personality-func lsda @@ -716,19 +715,17 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, // .quad __gxx_personality // .quad except_tab1 - uint32_t Encoding = - TAI.getCompactUnwindEncoding(Frame.Instructions, - getDataAlignmentFactor(Streamer), IsEH); + uint32_t Encoding = Frame.CompactUnwindEncoding; if (!Encoding) return false; // The encoding needs to know we have an LSDA. if (Frame.Lsda) Encoding |= 0x40000000; - Streamer.SwitchSection(TAI.getCompactUnwindSection()); + Streamer.SwitchSection(MOFI->getCompactUnwindSection()); // Range Start - unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI); + unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI); unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); if (VerboseAsm) Streamer.AddComment("Range Start"); Streamer.EmitSymbolValue(Frame.Function, Size); @@ -745,6 +742,7 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, Twine(llvm::utohexstr(Encoding))); Streamer.EmitIntValue(Encoding, Size); + // Personality Function Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr); if (VerboseAsm) Streamer.AddComment("Personality Function"); @@ -762,7 +760,6 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, Streamer.EmitIntValue(0, Size); // No LSDA return true; -#endif } const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, @@ -771,11 +768,12 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, const MCSymbol *lsda, unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); - const TargetAsmInfo &TAI = context.getTargetAsmInfo(); + const MCRegisterInfo &MRI = context.getRegisterInfo(); + const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); MCSymbol *sectionStart; - if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH) + if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH) sectionStart = context.CreateTempSymbol(); else sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); @@ -824,7 +822,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Return Address Register if (verboseAsm) streamer.AddComment("CIE Return Address Column"); - streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true)); + streamer.EmitULEB128IntValue(MRI.getDwarfRegNum(MRI.getRARegister(), true)); // Augmentation Data Length (optional) @@ -858,21 +856,22 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding"); // Encoding of the FDE pointers - EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI), + EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI), "FDE Encoding"); } // Initial Instructions - const std::vector<MachineMove> &Moves = TAI.getInitialFrameState(); + const MCAsmInfo &MAI = context.getAsmInfo(); + const std::vector<MachineMove> &Moves = MAI.getInitialFrameState(); std::vector<MCCFIInstruction> Instructions; for (int i = 0, n = Moves.size(); i != n; ++i) { MCSymbol *Label = Moves[i].getLabel(); const MachineLocation &Dst = - TranslateMachineLocation(TAI, Moves[i].getDestination()); + TranslateMachineLocation(MRI, Moves[i].getDestination()); const MachineLocation &Src = - TranslateMachineLocation(TAI, Moves[i].getSource()); + TranslateMachineLocation(MRI, Moves[i].getSource()); MCCFIInstruction Inst(Label, Dst, Src); Instructions.push_back(Inst); } @@ -893,10 +892,10 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, MCContext &context = streamer.getContext(); MCSymbol *fdeStart = context.CreateTempSymbol(); MCSymbol *fdeEnd = context.CreateTempSymbol(); - const TargetAsmInfo &TAI = context.getTargetAsmInfo(); + const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); - if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) { + if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) { MCSymbol *EHSym = context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh")); streamer.EmitEHSymAttributes(frame.Function, EHSym); @@ -925,7 +924,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, streamer.EmitSymbolValue(&cieStart, 4); } - unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI); + unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI); unsigned size = getSizeForEncoding(streamer, fdeEncoding); // PC Begin @@ -1011,26 +1010,34 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, bool UsingCFI, bool IsEH) { MCContext &Context = Streamer.getContext(); - const TargetAsmInfo &TAI = Context.getTargetAsmInfo(); - const MCSection &Section = IsEH ? *TAI.getEHFrameSection() : - *TAI.getDwarfFrameSection(); + MCObjectFileInfo *MOFI = + const_cast<MCObjectFileInfo*>(Context.getObjectFileInfo()); + FrameEmitterImpl Emitter(UsingCFI, IsEH); + ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos(); + + // Emit the compact unwind info if available. + // FIXME: This emits both the compact unwind and the old CIE/FDE + // information. Only one of those is needed. + if (IsEH && MOFI->getCompactUnwindSection()) + for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); + if (!Frame.CompactUnwindEncoding) + Emitter.EmitCompactUnwind(Streamer, Frame); + } + + const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() : + *MOFI->getDwarfFrameSection(); Streamer.SwitchSection(&Section); MCSymbol *SectionStart = Context.CreateTempSymbol(); Streamer.EmitLabel(SectionStart); + Emitter.setSectionStart(SectionStart); MCSymbol *FDEEnd = NULL; DenseMap<CIEKey, const MCSymbol*> CIEStarts; - FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart); const MCSymbol *DummyDebugKey = NULL; - for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { - const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); - if (IsEH && TAI.getCompactUnwindSection() && - Emitter.EmitCompactUnwind(Streamer, Frame)) { - FDEEnd = NULL; - continue; - } - + for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) { + const MCDwarfFrameInfo &Frame = FrameArray[i]; CIEKey Key(Frame.Personality, Frame.PersonalityEncoding, Frame.LsdaEncoding); const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey; diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp index 2c3f8e8..dad2e7b 100644 --- a/contrib/llvm/lib/MC/MCELF.cpp +++ b/contrib/llvm/lib/MC/MCELF.cpp @@ -16,7 +16,6 @@ #include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/Support/ELF.h" -#include "llvm/Target/TargetAsmBackend.h" namespace llvm { diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp index 49340ed..9ada08e 100644 --- a/contrib/llvm/lib/MC/MCELFStreamer.cpp +++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp @@ -21,11 +21,11 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; @@ -53,8 +53,9 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { case MCAF_SyntaxUnified: return; // no-op here. - case MCAF_Code16: return; // no-op here. - case MCAF_Code32: return; // no-op here. + case MCAF_Code16: return; // Change parsing mode; no-op here. + case MCAF_Code32: return; // Change parsing mode; no-op here. + case MCAF_Code64: return; // Change parsing mode; no-op here. case MCAF_SubsectionsViaSymbols: getAssembler().setSubsectionsViaSymbols(true); return; @@ -219,14 +220,14 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, SD.setSize(MCConstantExpr::Create(Size, getContext())); } -void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { +void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { // FIXME: Should this be caught and done earlier? MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); BindingExplicitlySet.insert(Symbol); - // FIXME: ByteAlignment is not needed here, but is required. - EmitCommonSymbol(Symbol, Size, 1); + EmitCommonSymbol(Symbol, Size, ByteAlignment); } void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { @@ -374,10 +375,10 @@ void MCELFStreamer::Finish() { this->MCObjectStreamer::Finish(); } -MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB, +MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll, bool NoExecStack) { - MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE); + MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); if (RelaxAll) S->getAssembler().setRelaxAll(true); if (NoExecStack) diff --git a/contrib/llvm/lib/MC/MCELFStreamer.h b/contrib/llvm/lib/MC/MCELFStreamer.h index 855e7e9..10bf775 100644 --- a/contrib/llvm/lib/MC/MCELFStreamer.h +++ b/contrib/llvm/lib/MC/MCELFStreamer.h @@ -25,11 +25,11 @@ namespace llvm { class MCELFStreamer : public MCObjectStreamer { public: - MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter) : MCObjectStreamer(Context, TAB, OS, Emitter) {} - MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, MCAssembler *Assembler) : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {} @@ -74,7 +74,8 @@ public: SD.setSize(Value); } - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0) { diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp index fcf1aab..da297fb 100644 --- a/contrib/llvm/lib/MC/MCExpr.cpp +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp index 81a939f..2317a28 100644 --- a/contrib/llvm/lib/MC/MCInstPrinter.cpp +++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; MCInstPrinter::~MCInstPrinter() { @@ -23,3 +25,12 @@ StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { assert(0 && "Target should implement this"); } + +void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) { + if (!Annot.empty()) { + if (CommentStream) + (*CommentStream) << Annot; + else + OS << " " << MAI.getCommentString() << " " << Annot; + } +} diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp new file mode 100644 index 0000000..7736702 --- /dev/null +++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp @@ -0,0 +1,21 @@ +//===-- MCInstrAnalysis.cpp - InstrDesc target hooks ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstrAnalysis.h" +using namespace llvm; + +uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + if (Inst.getNumOperands() == 0 || + Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(0).getImm(); + return Addr+Size+Imm; +} diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp index 309752e..3fe8ac7 100644 --- a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp +++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp @@ -133,9 +133,10 @@ public: return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment); } - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { LogCall("EmitLocalCommonSymbol"); - return Child->EmitLocalCommonSymbol(Symbol, Size); + return Child->EmitLocalCommonSymbol(Symbol, Size, ByteAlignment); } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp index 1b21249..aa35815 100644 --- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp +++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp @@ -20,10 +20,10 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; @@ -34,9 +34,9 @@ private: virtual void EmitInstToData(const MCInst &Inst); public: - MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, + MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} + : MCObjectStreamer(Context, MAB, OS, Emitter) {} /// @name MCStreamer Interface /// @{ @@ -67,7 +67,8 @@ public: virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(0 && "macho doesn't support this directive"); } - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { assert(0 && "macho doesn't support this directive"); } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, @@ -143,8 +144,9 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { // Do any generic stuff we need to do. switch (Flag) { case MCAF_SyntaxUnified: return; // no-op here. - case MCAF_Code16: return; // no-op here. - case MCAF_Code32: return; // no-op here. + case MCAF_Code16: return; // Change parsing mode; no-op here. + case MCAF_Code32: return; // Change parsing mode; no-op here. + case MCAF_Code64: return; // Change parsing mode; no-op here. case MCAF_SubsectionsViaSymbols: getAssembler().setSubsectionsViaSymbols(true); return; @@ -207,8 +209,8 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_ELF_TypeCommon: case MCSA_ELF_TypeNoType: case MCSA_ELF_TypeGnuUniqueObject: - case MCSA_IndirectSymbol: case MCSA_Hidden: + case MCSA_IndirectSymbol: case MCSA_Internal: case MCSA_Protected: case MCSA_Weak: @@ -410,10 +412,10 @@ void MCMachOStreamer::Finish() { this->MCObjectStreamer::Finish(); } -MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, +MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll) { - MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE); + MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/contrib/llvm/lib/MC/MCModule.cpp b/contrib/llvm/lib/MC/MCModule.cpp new file mode 100644 index 0000000..b1d09d9 --- /dev/null +++ b/contrib/llvm/lib/MC/MCModule.cpp @@ -0,0 +1,45 @@ +//===- lib/MC/MCModule.cpp - MCModule implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCModule.h" + +using namespace llvm; + +MCAtom *MCModule::createAtom(MCAtom::AtomType Type, + uint64_t Begin, uint64_t End) { + assert(Begin < End && "Creating MCAtom with endpoints reversed?"); + + // Check for atoms already covering this range. + IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin); + assert((!I.valid() || I.start() < End) && "Offset range already occupied!"); + + // Create the new atom and add it to our maps. + MCAtom *NewAtom = new MCAtom(Type, this, Begin, End); + AtomAllocationTracker.insert(NewAtom); + OffsetMap.insert(Begin, End, NewAtom); + return NewAtom; +} + +// remap - Update the interval mapping for an atom. +void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { + // Find and erase the old mapping. + IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin); + assert(I.valid() && "Atom offset not found in module!"); + assert(*I == Atom && "Previous atom mapping was invalid!"); + I.erase(); + + // Insert the new mapping. + OffsetMap.insert(NewBegin, NewEnd, Atom); + + // Update the atom internal bounds. + Atom->Begin = NewBegin; + Atom->End = NewEnd; +} + diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp index 9577af0..a6c0adb 100644 --- a/contrib/llvm/lib/MC/MCNullStreamer.cpp +++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp @@ -59,8 +59,8 @@ namespace { virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} - + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0) {} virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp new file mode 100644 index 0000000..df8b99d --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -0,0 +1,554 @@ +//===-- MObjectFileInfo.cpp - Object File Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/ADT/Triple.h" +using namespace llvm; + +void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { + // MachO + IsFunctionEHFrameSymbolPrivate = false; + SupportsWeakOmittedEHFrame = false; + + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel + | dwarf::DW_EH_PE_sdata4; + LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + + // .comm doesn't support alignment before Leopard. + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) + CommDirectiveSupportsAlignment = false; + + TextSection // .text + = Ctx->getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + DataSection // .data + = Ctx->getMachOSection("__DATA", "__data", 0, + SectionKind::getDataRel()); + + TLSDataSection // .tdata + = Ctx->getMachOSection("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR, + SectionKind::getDataRel()); + TLSBSSSection // .tbss + = Ctx->getMachOSection("__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + SectionKind::getThreadBSS()); + + // TODO: Verify datarel below. + TLSTLVSection // .tlv + = Ctx->getMachOSection("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES, + SectionKind::getDataRel()); + + TLSThreadInitSection + = Ctx->getMachOSection("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, + SectionKind::getDataRel()); + + CStringSection // .cstring + = Ctx->getMachOSection("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); + UStringSection + = Ctx->getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); + FourByteConstantSection // .literal4 + = Ctx->getMachOSection("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); + EightByteConstantSection // .literal8 + = Ctx->getMachOSection("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); + + // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back + // to using it in -static mode. + SixteenByteConstantSection = 0; + if (RelocM != Reloc::Static && + T.getArch() != Triple::x86_64 && T.getArch() != Triple::ppc64) + SixteenByteConstantSection = // .literal16 + Ctx->getMachOSection("__TEXT", "__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); + + ReadOnlySection // .const + = Ctx->getMachOSection("__TEXT", "__const", 0, + SectionKind::getReadOnly()); + + TextCoalSection + = Ctx->getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = Ctx->getMachOSection("__TEXT", "__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getReadOnly()); + ConstDataSection // .const_data + = Ctx->getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); + DataCoalSection + = Ctx->getMachOSection("__DATA","__datacoal_nt", + MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); + DataCommonSection + = Ctx->getMachOSection("__DATA","__common", + MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + DataBSSSection + = Ctx->getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + + + LazySymbolPointerSection + = Ctx->getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + NonLazySymbolPointerSection + = Ctx->getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + + if (RelocM == Reloc::Static) { + StaticCtorSection + = Ctx->getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getDataRel()); + StaticDtorSection + = Ctx->getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getDataRel()); + } else { + StaticCtorSection + = Ctx->getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); + StaticDtorSection + = Ctx->getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); + } + + // Exception Handling. + LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0, + SectionKind::getReadOnlyWithRel()); + + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + CompactUnwindSection = + Ctx->getMachOSection("__LD", "__compact_unwind", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getReadOnly()); + + // Debug Information. + DwarfAbbrevSection = + Ctx->getMachOSection("__DWARF", "__debug_abbrev", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfInfoSection = + Ctx->getMachOSection("__DWARF", "__debug_info", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLineSection = + Ctx->getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfFrameSection = + Ctx->getMachOSection("__DWARF", "__debug_frame", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getMachOSection("__DWARF", "__debug_pubnames", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubTypesSection = + Ctx->getMachOSection("__DWARF", "__debug_pubtypes", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfStrSection = + Ctx->getMachOSection("__DWARF", "__debug_str", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLocSection = + Ctx->getMachOSection("__DWARF", "__debug_loc", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfARangesSection = + Ctx->getMachOSection("__DWARF", "__debug_aranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfRangesSection = + Ctx->getMachOSection("__DWARF", "__debug_ranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + Ctx->getMachOSection("__DWARF", "__debug_macinfo", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfDebugInlineSection = + Ctx->getMachOSection("__DWARF", "__debug_inlined", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + + TLSExtraDataSection = TLSTLVSection; +} + +void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { + if (T.getArch() == Triple::x86) { + PersonalityEncoding = (RelocM == Reloc::PIC_) + ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + LSDAEncoding = (RelocM == Reloc::PIC_) + ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_) + ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + TTypeEncoding = (RelocM == Reloc::PIC_) + ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + } else if (T.getArch() == Triple::x86_64) { + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + if (RelocM == Reloc::PIC_) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); + LSDAEncoding = dwarf::DW_EH_PE_pcrel | + (CMModel == CodeModel::Small + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); + FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); + } else { + PersonalityEncoding = + (CMModel == CodeModel::Small || CMModel == CodeModel::Medium) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + LSDAEncoding = (CMModel == CodeModel::Small) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + FDEEncoding = dwarf::DW_EH_PE_udata4; + TTypeEncoding = (CMModel == CodeModel::Small) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + } + } + + // ELF + BSSSection = + Ctx->getELFSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getBSS()); + + TextSection = + Ctx->getELFSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, + SectionKind::getText()); + + DataSection = + Ctx->getELFSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getDataRel()); + + ReadOnlySection = + Ctx->getELFSection(".rodata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + TLSDataSection = + Ctx->getELFSection(".tdata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_TLS | + ELF::SHF_WRITE, + SectionKind::getThreadData()); + + TLSBSSSection = + Ctx->getELFSection(".tbss", ELF::SHT_NOBITS, + ELF::SHF_ALLOC | ELF::SHF_TLS | + ELF::SHF_WRITE, + SectionKind::getThreadBSS()); + + DataRelSection = + Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + + DataRelLocalSection = + Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRelLocal()); + + DataRelROSection = + Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + + DataRelROLocalSection = + Ctx->getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + + MergeableConst4Section = + Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, + SectionKind::getMergeableConst4()); + + MergeableConst8Section = + Ctx->getELFSection(".rodata.cst8", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, + SectionKind::getMergeableConst8()); + + MergeableConst16Section = + Ctx->getELFSection(".rodata.cst16", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, + SectionKind::getMergeableConst16()); + + StaticCtorSection = + Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + + StaticDtorSection = + Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Exception Handling Sections. + + // FIXME: We're emitting LSDA info into a readonly section on ELF, even though + // it contains relocatable pointers. In PIC mode, this is probably a big + // runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + Ctx->getELFSection(".gcc_except_table", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + // Debug Info Sections. + DwarfAbbrevSection = + Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfInfoSection = + Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLineSection = + Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfFrameSection = + Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubTypesSection = + Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfStrSection = + Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLocSection = + Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfARangesSection = + Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfRangesSection = + Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); +} + + +void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { + // COFF + TextSection = + Ctx->getCOFFSection(".text", + COFF::IMAGE_SCN_CNT_CODE | + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getText()); + DataSection = + Ctx->getCOFFSection(".data", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + ReadOnlySection = + Ctx->getCOFFSection(".rdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + StaticCtorSection = + Ctx->getCOFFSection(".ctors", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + StaticDtorSection = + Ctx->getCOFFSection(".dtors", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + + // FIXME: We're emitting LSDA info into a readonly section on COFF, even + // though it contains relocatable pointers. In PIC mode, this is probably a + // big runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + Ctx->getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + + // Debug info. + DwarfAbbrevSection = + Ctx->getCOFFSection(".debug_abbrev", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfInfoSection = + Ctx->getCOFFSection(".debug_info", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfLineSection = + Ctx->getCOFFSection(".debug_line", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfFrameSection = + Ctx->getCOFFSection(".debug_frame", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getCOFFSection(".debug_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubTypesSection = + Ctx->getCOFFSection(".debug_pubtypes", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfStrSection = + Ctx->getCOFFSection(".debug_str", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfLocSection = + Ctx->getCOFFSection(".debug_loc", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfARangesSection = + Ctx->getCOFFSection(".debug_aranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfRangesSection = + Ctx->getCOFFSection(".debug_ranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + Ctx->getCOFFSection(".debug_macinfo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + + DrectveSection = + Ctx->getCOFFSection(".drectve", + COFF::IMAGE_SCN_LNK_INFO, + SectionKind::getMetadata()); + + PDataSection = + Ctx->getCOFFSection(".pdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + + XDataSection = + Ctx->getCOFFSection(".xdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + +void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, + CodeModel::Model cm, + MCContext &ctx) { + RelocM = relocm; + CMModel = cm; + Ctx = &ctx; + + // Common. + CommDirectiveSupportsAlignment = true; + SupportsWeakOmittedEHFrame = true; + IsFunctionEHFrameSymbolPrivate = true; + + PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding = + TTypeEncoding = dwarf::DW_EH_PE_absptr; + + EHFrameSection = 0; // Created on demand. + CompactUnwindSection = 0; // Used only by selected targets. + + Triple T(TT); + Triple::ArchType Arch = T.getArch(); + // FIXME: Checking for Arch here to filter out bogus triples such as + // cellspu-apple-darwin. Perhaps we should fix in Triple? + if ((Arch == Triple::x86 || Arch == Triple::x86_64 || + Arch == Triple::arm || Arch == Triple::thumb || + Arch == Triple::ppc || Arch == Triple::ppc64 || + Arch == Triple::UnknownArch) && + (T.isOSDarwin() || T.getEnvironment() == Triple::MachO)) { + Env = IsMachO; + InitMachOMCObjectFileInfo(T); + } else if ((Arch == Triple::x86 || Arch == Triple::x86_64) && + (T.getOS() == Triple::MinGW32 || T.getOS() == Triple::Cygwin || + T.getOS() == Triple::Win32)) { + Env = IsCOFF; + InitCOFFMCObjectFileInfo(T); + } else { + Env = IsELF; + InitELFMCObjectFileInfo(T); + } +} + +void MCObjectFileInfo::InitEHFrameSection() { + if (Env == IsMachO) + EHFrameSection = + Ctx->getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + else if (Env == IsELF) + EHFrameSection = + Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + else + EHFrameSection = + Ctx->getCOFFSection(".eh_frame", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp index 8635aac..a04ae08 100644 --- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -17,10 +17,10 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/MC/MCAsmBackend.h" using namespace llvm; -MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, +MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_) : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, @@ -30,7 +30,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, { } -MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, +MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_, MCAssembler *_Assembler) : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp index 0c1f8f0..c76052d 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -24,6 +24,7 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { CurBuf = NULL; CurPtr = NULL; + isAtStartOfLine = true; } AsmLexer::~AsmLexer() { @@ -146,7 +147,7 @@ AsmToken AsmLexer::LexLineComment() { // FIXME: This is broken if we happen to a comment at the end of a file, which // was .included, and which doesn't end with a newline. int CurChar = getNextChar(); - while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); if (CurChar == EOF) @@ -334,6 +335,17 @@ StringRef AsmLexer::LexUntilEndOfStatement() { return StringRef(TokStart, CurPtr-TokStart); } +StringRef AsmLexer::LexUntilEndOfLine() { + TokStart = CurPtr; + + while (*CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); +} + bool AsmLexer::isAtStartOfComment(char Char) { // FIXME: This won't work for multi-character comment indicators like "//". return Char == *MAI.getCommentString(); @@ -349,14 +361,29 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (isAtStartOfComment(CurChar)) + if (isAtStartOfComment(CurChar)) { + // If this comment starts with a '#', then return the Hash token and let + // the assembler parser see if it can be parsed as a cpp line filename + // comment. We do this only if we are at the start of a line. + if (CurChar == '#' && isAtStartOfLine) + return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + isAtStartOfLine = true; return LexLineComment(); + } if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } + // If we're missing a newline at EOF, make sure we still get an + // EndOfStatement token before the Eof token. + if (CurChar == EOF && !isAtStartOfLine) { + isAtStartOfLine = true; + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + } + + isAtStartOfLine = false; switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -373,6 +400,7 @@ AsmToken AsmLexer::LexToken() { return LexToken(); case '\n': // FALL THROUGH. case '\r': + isAtStartOfLine = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index 0c181f3..1648757 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -18,22 +18,22 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/AsmCond.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetAsmParser.h" #include <cctype> #include <vector> using namespace llvm; @@ -87,6 +87,8 @@ private: MCStreamer &Out; const MCAsmInfo &MAI; SourceMgr &SrcMgr; + SourceMgr::DiagHandlerTy SavedDiagHandler; + void *SavedDiagContext; MCAsmParserExtension *GenericParser; MCAsmParserExtension *PlatformParser; @@ -115,8 +117,13 @@ private: /// Flag tracking whether any errors have been encountered. unsigned HadError : 1; + /// The values from the last parsed cpp hash file line comment if any. + StringRef CppHashFilename; + int64_t CppHashLineNumber; + SMLoc CppHashLoc; + public: - AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); ~AsmParser(); @@ -153,6 +160,8 @@ private: void CheckForValidSection(); bool ParseStatement(); + void EatToEndOfLine(); + bool ParseCppHashLineFilenameComment(const SMLoc &L); bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); bool expandMacro(SmallString<256> &Buf, StringRef Body, @@ -166,6 +175,7 @@ private: bool ShowLine = true) const { SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine); } + static void DiagHandler(const SMDiagnostic &Diag, void *Context); /// EnterIncludeFile - Enter the specified file. This returns true on failure. bool EnterIncludeFile(const std::string &Filename); @@ -338,11 +348,16 @@ extern MCAsmParserExtension *createCOFFAsmParser(); enum { DEFAULT_ADDRSPACE = 0 }; -AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, +AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), GenericParser(new GenericAsmParser), PlatformParser(0), - CurBuffer(0), MacrosEnabled(true) { + CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0) { + // Save the old handler. + SavedDiagHandler = SrcMgr.getDiagHandler(); + SavedDiagContext = SrcMgr.getDiagContext(); + // Set our own handler which calls the saved handler. + SrcMgr.setDiagHandler(DiagHandler, this); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); // Initialize the generic parser. @@ -740,9 +755,12 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, /// ParseExpression - Parse an expression and return it. /// -/// expr ::= expr +,- expr -> lowest. -/// expr ::= expr |,^,&,! expr -> middle. -/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= expr &&,|| expr -> lowest. +/// expr ::= expr |,^,&,! expr +/// expr ::= expr ==,!=,<>,<,<=,>,>= expr +/// expr ::= expr <<,>> expr +/// expr ::= expr +,- expr +/// expr ::= expr *,/,% expr -> highest. /// expr ::= primaryexpr /// bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { @@ -809,7 +827,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, default: return 0; // not a binop. - // Lowest Precedence: &&, ||, @ + // Lowest Precedence: &&, || case AsmToken::AmpAmp: Kind = MCBinaryExpr::LAnd; return 1; @@ -852,30 +870,32 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::GTE; return 3; + // Intermediate Precedence: <<, >> + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; + return 4; + case AsmToken::GreaterGreater: + Kind = MCBinaryExpr::Shr; + return 4; + // High Intermediate Precedence: +, - case AsmToken::Plus: Kind = MCBinaryExpr::Add; - return 4; + return 5; case AsmToken::Minus: Kind = MCBinaryExpr::Sub; - return 4; + return 5; - // Highest Precedence: *, /, %, <<, >> + // Highest Precedence: *, /, % case AsmToken::Star: Kind = MCBinaryExpr::Mul; - return 5; + return 6; case AsmToken::Slash: Kind = MCBinaryExpr::Div; - return 5; + return 6; case AsmToken::Percent: Kind = MCBinaryExpr::Mod; - return 5; - case AsmToken::LessLess: - Kind = MCBinaryExpr::Shl; - return 5; - case AsmToken::GreaterGreater: - Kind = MCBinaryExpr::Shr; - return 5; + return 6; } } @@ -932,10 +952,8 @@ bool AsmParser::ParseStatement() { StringRef IDVal; int64_t LocalLabelVal = -1; // A full line comment is a '#' as the first token. - if (Lexer.is(AsmToken::Hash)) { - EatToEndOfStatement(); - return false; - } + if (Lexer.is(AsmToken::Hash)) + return ParseCppHashLineFilenameComment(IDLoc); // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { @@ -1117,15 +1135,8 @@ bool AsmParser::ParseStatement() { if (IDVal == ".globl" || IDVal == ".global") return ParseDirectiveSymbolAttribute(MCSA_Global); - // ELF only? Should it be here? - if (IDVal == ".local") - return ParseDirectiveSymbolAttribute(MCSA_Local); - if (IDVal == ".hidden") - return ParseDirectiveSymbolAttribute(MCSA_Hidden); if (IDVal == ".indirect_symbol") return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); - if (IDVal == ".internal") - return ParseDirectiveSymbolAttribute(MCSA_Internal); if (IDVal == ".lazy_reference") return ParseDirectiveSymbolAttribute(MCSA_LazyReference); if (IDVal == ".no_dead_strip") @@ -1134,12 +1145,8 @@ bool AsmParser::ParseStatement() { return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver); if (IDVal == ".private_extern") return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); - if (IDVal == ".protected") - return ParseDirectiveSymbolAttribute(MCSA_Protected); if (IDVal == ".reference") return ParseDirectiveSymbolAttribute(MCSA_Reference); - if (IDVal == ".weak") - return ParseDirectiveSymbolAttribute(MCSA_Weak); if (IDVal == ".weak_definition") return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); if (IDVal == ".weak_reference") @@ -1157,7 +1164,7 @@ bool AsmParser::ParseStatement() { if (IDVal == ".include") return ParseDirectiveInclude(); - if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64") + if (IDVal == ".code16") return TokError(Twine(IDVal) + " not supported yet"); // Look up the handler in the handler table. @@ -1215,6 +1222,108 @@ bool AsmParser::ParseStatement() { return false; } +/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line +/// since they may not be able to be tokenized to get to the end of line token. +void AsmParser::EatToEndOfLine() { + if (!Lexer.is(AsmToken::EndOfStatement)) + Lexer.LexUntilEndOfLine(); + // Eat EOL. + Lex(); +} + +/// ParseCppHashLineFilenameComment as this: +/// ::= # number "filename" +/// or just as a full line comment if it doesn't have a number and a string. +bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { + Lex(); // Eat the hash token. + + if (getLexer().isNot(AsmToken::Integer)) { + // Consume the line since in cases it is not a well-formed line directive, + // as if were simply a full line comment. + EatToEndOfLine(); + return false; + } + + int64_t LineNumber = getTok().getIntVal(); + Lex(); + + if (getLexer().isNot(AsmToken::String)) { + EatToEndOfLine(); + return false; + } + + StringRef Filename = getTok().getString(); + // Get rid of the enclosing quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Save the SMLoc, Filename and LineNumber for later use by diagnostics. + CppHashLoc = L; + CppHashFilename = Filename; + CppHashLineNumber = LineNumber; + + // Ignore any trailing characters, they're just comment. + EatToEndOfLine(); + return false; +} + +/// DiagHandler - will use the the last parsed cpp hash line filename comment +/// for the Filename and LineNo if any in the diagnostic. +void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { + const AsmParser *Parser = static_cast<const AsmParser*>(Context); + raw_ostream &OS = errs(); + + const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); + const SMLoc &DiagLoc = Diag.getLoc(); + int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); + int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc); + + // Like SourceMgr::PrintMessage() we need to print the include stack if any + // before printing the message. + int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); + if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) { + SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); + DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); + } + + // If we have not parsed a cpp hash line filename comment or the source + // manager changed or buffer changed (like in a nested include) then just + // print the normal diagnostic using its Filename and LineNo. + if (!Parser->CppHashLineNumber || + &DiagSrcMgr != &Parser->SrcMgr || + DiagBuf != CppHashBuf) { + if (Parser->SavedDiagHandler) + Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); + else + Diag.Print(0, OS); + return; + } + + // Use the CppHashFilename and calculate a line number based on the + // CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for + // the diagnostic. + const std::string Filename = Parser->CppHashFilename; + + int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf); + int CppHashLocLineNo = + Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf); + int LineNo = Parser->CppHashLineNumber - 1 + + (DiagLocLineNo - CppHashLocLineNo); + + SMDiagnostic NewDiag(*Diag.getSourceMgr(), + Diag.getLoc(), + Filename, + LineNo, + Diag.getColumnNo(), + Diag.getMessage(), + Diag.getLineContents(), + Diag.getShowLine()); + + if (Parser->SavedDiagHandler) + Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext); + else + NewDiag.Print(0, OS); +} + bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, const std::vector<StringRef> &Parameters, const std::vector<std::vector<AsmToken> > &A, @@ -1923,12 +2032,17 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { StringRef Name; + SMLoc Loc = getTok().getLoc(); if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); + return Error(Loc, "expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + // Assembler local symbols don't make any sense here. Complain loudly. + if (Sym->isTemporary()) + return Error(Loc, "non-local symbol required in directive"); + getStreamer().EmitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) @@ -2416,7 +2530,7 @@ bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register, if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc)) return true; - Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true); + Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true); } else return getParser().ParseAbsoluteExpression(Register); @@ -2724,8 +2838,8 @@ bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { /// \brief Create an MCAsmParser instance. -MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM, +MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C, MCStreamer &Out, const MCAsmInfo &MAI) { - return new AsmParser(T, SM, C, Out, MAI); + return new AsmParser(SM, C, Out, MAI); } diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 66ad384..185b516 100644 --- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -8,15 +8,16 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/COFF.h" using namespace llvm; @@ -72,6 +73,7 @@ class COFFAsmParser : public MCAsmParserExtension { ".seh_pushframe"); AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>( ".seh_endprologue"); + AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); } bool ParseSectionDirectiveText(StringRef, SMLoc) { @@ -118,12 +120,44 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except); bool ParseSEHRegisterNumber(unsigned &RegNo); + bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc); public: COFFAsmParser() {} }; } // end annonomous namespace. +/// ParseDirectiveSymbolAttribute +/// ::= { ".weak", ... } [ identifier ( , identifier )* ] +bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { + MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive) + .Case(".weak", MCSA_Weak) + .Default(MCSA_Invalid); + assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + bool COFFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind) { @@ -401,12 +435,16 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { SMLoc startLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Percent)) { - const TargetAsmInfo &TAI = getContext().getTargetAsmInfo(); + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); SMLoc endLoc; unsigned LLVMRegNo; if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc)) return true; +#if 0 + // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering + // violation so this validation code is disabled. + // Check that this is a non-volatile register. const unsigned *NVRegs = TAI.getCalleeSavedRegs(); unsigned i; @@ -415,8 +453,9 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { break; if (NVRegs[i] == 0) return Error(startLoc, "expected non-volatile register"); +#endif - int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo); + int SEHRegNo = MRI.getSEHRegNum(LLVMRegNo); if (SEHRegNo < 0) return Error(startLoc,"register can't be represented in SEH unwind info"); RegNo = SEHRegNo; diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp index dcf689a..d891126 100644 --- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -47,12 +47,17 @@ public: AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); + AddDirectiveHandler< + &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); + AddDirectiveHandler< + &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); + AddDirectiveHandler< + &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); + AddDirectiveHandler< + &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection"); + AddDirectiveHandler< + &ELFAsmParser::ParseDirectivePushSection>(".pushsection"); AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); @@ -60,6 +65,14 @@ public: AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local"); + AddDirectiveHandler< + &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected"); + AddDirectiveHandler< + &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal"); + AddDirectiveHandler< + &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); } // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is @@ -129,6 +142,7 @@ public: bool ParseDirectiveIdent(StringRef, SMLoc); bool ParseDirectiveSymver(StringRef, SMLoc); bool ParseDirectiveWeakref(StringRef, SMLoc); + bool ParseDirectiveSymbolAttribute(StringRef, SMLoc); private: bool ParseSectionName(StringRef &SectionName); @@ -136,6 +150,41 @@ private: } +/// ParseDirectiveSymbolAttribute +/// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ] +bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { + MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive) + .Case(".weak", MCSA_Weak) + .Case(".local", MCSA_Local) + .Case(".hidden", MCSA_Hidden) + .Case(".internal", MCSA_Internal) + .Case(".protected", MCSA_Protected) + .Default(MCSA_Invalid); + assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind) { if (getLexer().isNot(AsmToken::EndOfStatement)) diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp index 4030e41..5239ec7 100644 --- a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -8,13 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/ADT/Twine.h" using namespace llvm; MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) { @@ -23,7 +23,7 @@ MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) { MCAsmParser::~MCAsmParser() { } -void MCAsmParser::setTargetParser(TargetAsmParser &P) { +void MCAsmParser::setTargetParser(MCTargetAsmParser &P) { assert(!TargetParser && "Target parser is already initialized!"); TargetParser = &P; TargetParser->Initialize(*this); diff --git a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp index 512f6b0..6fb1ba4 100644 --- a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -1,4 +1,4 @@ -//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==// +//===-- MCTargetAsmParser.cpp - Target Assembly Parser ---------------------==// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" using namespace llvm; -TargetAsmParser::TargetAsmParser() +MCTargetAsmParser::MCTargetAsmParser() : AvailableFeatures(0) { } -TargetAsmParser::~TargetAsmParser() { +MCTargetAsmParser::~MCTargetAsmParser() { } diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp index 6098e6b..086c922 100644 --- a/contrib/llvm/lib/MC/MCPureStreamer.cpp +++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp @@ -28,7 +28,7 @@ private: virtual void EmitInstToData(const MCInst &Inst); public: - MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB, + MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter) : MCObjectStreamer(Context, TAB, OS, Emitter) {} @@ -86,7 +86,8 @@ public: virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { report_fatal_error("unsupported directive in pure streamer"); } - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { report_fatal_error("unsupported directive in pure streamer"); } virtual void EmitFileDirective(StringRef Filename) { @@ -228,7 +229,7 @@ void MCPureStreamer::Finish() { this->MCObjectStreamer::Finish(); } -MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB, +MCStreamer *llvm::createPureStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *CE) { - return new MCPureStreamer(Context, TAB, OS, CE); + return new MCPureStreamer(Context, MAB, OS, CE); } diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp index 6e96b78..3afa22b 100644 --- a/contrib/llvm/lib/MC/MCStreamer.cpp +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -16,13 +16,17 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include <cstdlib> using namespace llvm; MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), - CurrentW64UnwindInfo(0) { + CurrentW64UnwindInfo(0), + LastSymbol(0), + UniqueCodeBeginSuffix(0), + UniqueDataBeginSuffix(0) { const MCSection *section = NULL; SectionStack.push_back(std::make_pair(section, section)); } @@ -171,10 +175,94 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection() && "Cannot emit before setting section!"); Symbol->setSection(*getCurrentSection()); + LastSymbol = Symbol; +} - StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix(); - if (!Symbol->getName().startswith(Prefix)) - LastNonPrivate = Symbol; +void MCStreamer::EmitDataRegion() { + if (RegionIndicator == Data) return; + + MCContext &Context = getContext(); + const MCAsmInfo &MAI = Context.getAsmInfo(); + if (!MAI.getSupportsDataRegions()) return; + + // Generate a unique symbol name. + MCSymbol *NewSym = Context.GetOrCreateSymbol( + Twine(MAI.getDataBeginLabelName()) + + utostr(UniqueDataBeginSuffix++)); + EmitLabel(NewSym); + + RegionIndicator = Data; +} + +void MCStreamer::EmitCodeRegion() { + if (RegionIndicator == Code) return; + + MCContext &Context = getContext(); + const MCAsmInfo &MAI = Context.getAsmInfo(); + if (!MAI.getSupportsDataRegions()) return; + + // Generate a unique symbol name. + MCSymbol *NewSym = Context.GetOrCreateSymbol( + Twine(MAI.getCodeBeginLabelName()) + + utostr(UniqueCodeBeginSuffix++)); + EmitLabel(NewSym); + + RegionIndicator = Code; +} + +void MCStreamer::EmitJumpTable8Region() { + if (RegionIndicator == JumpTable8) return; + + MCContext &Context = getContext(); + const MCAsmInfo &MAI = Context.getAsmInfo(); + if (!MAI.getSupportsDataRegions()) return; + + // Generate a unique symbol name. + MCSymbol *NewSym = Context.GetOrCreateSymbol( + Twine(MAI.getJumpTable8BeginLabelName()) + + utostr(UniqueDataBeginSuffix++)); + EmitLabel(NewSym); + + RegionIndicator = JumpTable8; +} + +void MCStreamer::EmitJumpTable16Region() { + if (RegionIndicator == JumpTable16) return; + + MCContext &Context = getContext(); + const MCAsmInfo &MAI = Context.getAsmInfo(); + if (!MAI.getSupportsDataRegions()) return; + + // Generate a unique symbol name. + MCSymbol *NewSym = Context.GetOrCreateSymbol( + Twine(MAI.getJumpTable16BeginLabelName()) + + utostr(UniqueDataBeginSuffix++)); + EmitLabel(NewSym); + + RegionIndicator = JumpTable16; +} + + +void MCStreamer::EmitJumpTable32Region() { + if (RegionIndicator == JumpTable32) return; + + MCContext &Context = getContext(); + const MCAsmInfo &MAI = Context.getAsmInfo(); + if (!MAI.getSupportsDataRegions()) return; + + // Generate a unique symbol name. + MCSymbol *NewSym = Context.GetOrCreateSymbol( + Twine(MAI.getJumpTable32BeginLabelName()) + + utostr(UniqueDataBeginSuffix++)); + EmitLabel(NewSym); + + RegionIndicator = JumpTable32; +} + +void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + CurFrame->CompactUnwindEncoding = CompactUnwindEncoding; } void MCStreamer::EmitCFISections(bool EH, bool Debug) { @@ -187,11 +275,22 @@ void MCStreamer::EmitCFIStartProc() { MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); if (CurFrame && !CurFrame->End) report_fatal_error("Starting a frame before finishing the previous one!"); + MCDwarfFrameInfo Frame; - Frame.Begin = getContext().CreateTempSymbol(); - Frame.Function = LastNonPrivate; - EmitLabel(Frame.Begin); + Frame.Function = LastSymbol; + + // If the function is externally visible, we need to create a local + // symbol to avoid relocations. + StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix(); + if (LastSymbol && LastSymbol->getName().startswith(Prefix)) { + Frame.Begin = LastSymbol; + } else { + Frame.Begin = getContext().CreateTempSymbol(); + EmitLabel(Frame.Begin); + } + FrameInfos.push_back(Frame); + RegionIndicator = Code; } void MCStreamer::EmitCFIEndProc() { diff --git a/contrib/llvm/lib/Target/TargetAsmLexer.cpp b/contrib/llvm/lib/MC/MCTargetAsmLexer.cpp index d4893ff..c01c914 100644 --- a/contrib/llvm/lib/Target/TargetAsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCTargetAsmLexer.cpp @@ -1,4 +1,4 @@ -//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===// +//===-- llvm/MC/MCTargetAsmLexer.cpp - Target Assembly Lexer --------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/MC/MCTargetAsmLexer.h" using namespace llvm; -TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {} -TargetAsmLexer::~TargetAsmLexer() {} +MCTargetAsmLexer::MCTargetAsmLexer(const Target &T) + : TheTarget(T), Lexer(NULL) { +} +MCTargetAsmLexer::~MCTargetAsmLexer() {} diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp index e698384..79e66fc 100644 --- a/contrib/llvm/lib/MC/MCWin64EH.cpp +++ b/contrib/llvm/lib/MC/MCWin64EH.cpp @@ -10,10 +10,11 @@ #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/ADT/Twine.h" namespace llvm { @@ -220,14 +221,36 @@ StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) { return ""; } +static const MCSection *getWin64EHTableSection(StringRef suffix, + MCContext &context) { + if (suffix == "") + return context.getObjectFileInfo()->getXDataSection(); + + return context.getCOFFSection((".xdata"+suffix).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + +static const MCSection *getWin64EHFuncTableSection(StringRef suffix, + MCContext &context) { + if (suffix == "") + return context.getObjectFileInfo()->getPDataSection(); + return context.getCOFFSection((".pdata"+suffix).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) { // Switch sections (the static function above is meant to be called from // here and from Emit(). MCContext &context = streamer.getContext(); - const TargetAsmInfo &TAI = context.getTargetAsmInfo(); const MCSection *xdataSect = - TAI.getWin64EHTableSection(GetSectionSuffix(info->Function)); + getWin64EHTableSection(GetSectionSuffix(info->Function), context); streamer.SwitchSection(xdataSect); llvm::EmitUnwindInfo(streamer, info); @@ -236,11 +259,10 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { MCContext &context = streamer.getContext(); // Emit the unwind info structs first. - const TargetAsmInfo &TAI = context.getTargetAsmInfo(); for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); const MCSection *xdataSect = - TAI.getWin64EHTableSection(GetSectionSuffix(info.Function)); + getWin64EHTableSection(GetSectionSuffix(info.Function), context); streamer.SwitchSection(xdataSect); llvm::EmitUnwindInfo(streamer, &info); } @@ -248,7 +270,7 @@ void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); const MCSection *pdataSect = - TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function)); + getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context); streamer.SwitchSection(pdataSect); EmitRuntimeFunction(streamer, &info); } diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp index 69efe23..a9219ad 100644 --- a/contrib/llvm/lib/MC/MachObjectWriter.cpp +++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetAsmBackend.h" #include <vector> using namespace llvm; @@ -291,7 +291,7 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, const MCSymbol &Symbol = Data.getSymbol(); uint8_t Type = 0; uint16_t Flags = Data.getFlags(); - uint32_t Address = 0; + uint64_t Address = 0; // Set the N_TYPE bits. See <mach-o/nlist.h>. // @@ -590,14 +590,28 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, return false; return true; } + // For Darwin x86_64, there is one special case when the reference IsPCRel. + // If the fragment with the reference does not have a base symbol but meets + // the simple way of dealing with this, in that it is a temporary symbol in + // the same atom then it is assumed to be fully resolved. This is needed so + // a relocation entry is not created and so the static linker does not + // mess up the reference later. + else if(!FB.getAtom() && + SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ + return true; + } } else { if (!TargetObjectWriter->useAggressiveSymbolFolding()) return false; } - const MCFragment &FA = *Asm.getSymbolData(SA).getFragment(); + const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); + + // Bail if the symbol has no fragment. + if (!FA) + return false; - A_Base = FA.getAtom(); + A_Base = FA->getAtom(); if (!A_Base) return false; @@ -613,7 +627,8 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, return false; } -void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { +void MachObjectWriter::WriteObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { unsigned NumSections = Asm.size(); // The section data starts after the header, the segment load command (and diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp index 101237a..b15e225 100644 --- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -33,7 +33,7 @@ #include "llvm/Support/TimeValue.h" -#include "../Target/X86/X86FixupKinds.h" +#include "../Target/X86/MCTargetDesc/X86FixupKinds.h" #include <cstdio> diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp index 6c36c12..7409daf 100644 --- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp +++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp @@ -24,13 +24,13 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCWin64EH.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -40,7 +40,7 @@ public: MCSymbol const *CurSymbol; WinCOFFStreamer(MCContext &Context, - TargetAsmBackend &TAB, + MCAsmBackend &MAB, MCCodeEmitter &CE, raw_ostream &OS); @@ -63,7 +63,8 @@ public: virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size,unsigned ByteAlignment); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, @@ -123,10 +124,10 @@ private: } // end anonymous namespace. WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, - TargetAsmBackend &TAB, + MCAsmBackend &MAB, MCCodeEmitter &CE, raw_ostream &OS) - : MCObjectStreamer(Context, TAB, OS, &CE) + : MCObjectStreamer(Context, MAB, OS, &CE) , CurSymbol(NULL) { } @@ -304,11 +305,12 @@ void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, AddCommonSymbol(Symbol, Size, ByteAlignment, true); } -void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { +void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { assert((Symbol->isInSection() ? Symbol->getSection().getVariant() == MCSection::SV_COFF : true) && "Got non COFF section in the COFF backend!"); - AddCommonSymbol(Symbol, Size, 1, false); + AddCommonSymbol(Symbol, Size, ByteAlignment, false); } void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, @@ -395,11 +397,11 @@ void WinCOFFStreamer::Finish() { namespace llvm { MCStreamer *createWinCOFFStreamer(MCContext &Context, - TargetAsmBackend &TAB, + MCAsmBackend &MAB, MCCodeEmitter &CE, raw_ostream &OS, bool RelaxAll) { - WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS); + WinCOFFStreamer *S = new WinCOFFStreamer(Context, MAB, CE, OS); S->getAssembler().setRelaxAll(RelaxAll); return S; } diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp new file mode 100644 index 0000000..e2eaff5 --- /dev/null +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -0,0 +1,172 @@ +//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ArchiveObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Archive.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +namespace { +const StringRef Magic = "!<arch>\n"; + +struct ArchiveMemberHeader { + char Name[16]; + char LastModified[12]; + char UID[6]; + char GID[6]; + char AccessMode[8]; + char Size[10]; //< Size of data, not including header or padding. + char Terminator[2]; + + ///! Get the name without looking up long names. + StringRef getName() const { + char EndCond = Name[0] == '/' ? ' ' : '/'; + StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond); + if (end == StringRef::npos) + end = sizeof(Name); + assert(end <= sizeof(Name) && end > 0); + // Don't include the EndCond if there is one. + return StringRef(Name, end); + } + + uint64_t getSize() const { + APInt ret; + StringRef(Size, sizeof(Size)).getAsInteger(10, ret); + return ret.getZExtValue(); + } +}; + +const ArchiveMemberHeader *ToHeader(const char *base) { + return reinterpret_cast<const ArchiveMemberHeader *>(base); +} +} + +Archive::Child Archive::Child::getNext() const { + size_t SpaceToSkip = sizeof(ArchiveMemberHeader) + + ToHeader(Data.data())->getSize(); + // If it's odd, add 1 to make it even. + if (SpaceToSkip & 1) + ++SpaceToSkip; + + const char *NextLoc = Data.data() + SpaceToSkip; + + // Check to see if this is past the end of the archive. + if (NextLoc >= Parent->Data->getBufferEnd()) + return Child(Parent, StringRef(0, 0)); + + size_t NextSize = sizeof(ArchiveMemberHeader) + + ToHeader(NextLoc)->getSize(); + + return Child(Parent, StringRef(NextLoc, NextSize)); +} + +error_code Archive::Child::getName(StringRef &Result) const { + StringRef name = ToHeader(Data.data())->getName(); + // Check if it's a special name. + if (name[0] == '/') { + if (name.size() == 1) { // Linker member. + Result = name; + return object_error::success; + } + if (name.size() == 2 && name[1] == '/') { // String table. + Result = name; + return object_error::success; + } + // It's a long name. + // Get the offset. + APInt offset; + name.substr(1).getAsInteger(10, offset); + const char *addr = Parent->StringTable->Data.begin() + + sizeof(ArchiveMemberHeader) + + offset.getZExtValue(); + // Verify it. + if (Parent->StringTable == Parent->end_children() + || addr < (Parent->StringTable->Data.begin() + + sizeof(ArchiveMemberHeader)) + || addr > (Parent->StringTable->Data.begin() + + sizeof(ArchiveMemberHeader) + + Parent->StringTable->getSize())) + return object_error::parse_failed; + Result = addr; + return object_error::success; + } + // It's a simple name. + if (name[name.size() - 1] == '/') + Result = name.substr(0, name.size() - 1); + else + Result = name; + return object_error::success; +} + +uint64_t Archive::Child::getSize() const { + return ToHeader(Data.data())->getSize(); +} + +MemoryBuffer *Archive::Child::getBuffer() const { + StringRef name; + if (getName(name)) return NULL; + return MemoryBuffer::getMemBuffer(Data.substr(sizeof(ArchiveMemberHeader), + getSize()), + name, + false); +} + +error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const { + OwningPtr<Binary> ret; + if (error_code ec = + createBinary(getBuffer(), ret)) + return ec; + Result.swap(ret); + return object_error::success; +} + +Archive::Archive(MemoryBuffer *source, error_code &ec) + : Binary(Binary::isArchive, source) + , StringTable(Child(this, StringRef(0, 0))) { + // Check for sufficient magic. + if (!source || source->getBufferSize() + < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive. + || StringRef(source->getBufferStart(), 8) != Magic) { + ec = object_error::invalid_file_type; + return; + } + + // Get the string table. It's the 3rd member. + child_iterator StrTable = begin_children(); + child_iterator e = end_children(); + for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {} + + // Check to see if there were 3 members, or the 3rd member wasn't named "//". + StringRef name; + if (StrTable != e && !StrTable->getName(name) && name == "//") + StringTable = StrTable; + + ec = object_error::success; +} + +Archive::child_iterator Archive::begin_children() const { + const char *Loc = Data->getBufferStart() + Magic.size(); + size_t Size = sizeof(ArchiveMemberHeader) + + ToHeader(Loc)->getSize(); + return Child(this, StringRef(Loc, Size)); +} + +Archive::child_iterator Archive::end_children() const { + return Child(this, StringRef(0, 0)); +} + +namespace llvm { + +} // end namespace llvm diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp index 4b31c75..4e528d8 100644 --- a/contrib/llvm/lib/Object/Binary.cpp +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -17,8 +17,9 @@ #include "llvm/Support/Path.h" // Include headers for createBinary. -#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ObjectFile.h" using namespace llvm; using namespace object; @@ -50,6 +51,12 @@ error_code object::createBinary(MemoryBuffer *Source, static_cast<unsigned>(Source->getBufferSize())); error_code ec; switch (type) { + case sys::Archive_FileType: { + OwningPtr<Binary> ret(new Archive(scopedSource.take(), ec)); + if (ec) return ec; + Result.swap(ret); + return object_error::success; + } case sys::ELF_Relocatable_FileType: case sys::ELF_Executable_FileType: case sys::ELF_SharedObject_FileType: @@ -90,7 +97,7 @@ error_code object::createBinary(MemoryBuffer *Source, error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) { OwningPtr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFile(Path, File)) + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Path, File)) return ec; return createBinary(File.take(), Result); } diff --git a/contrib/llvm/lib/Object/CMakeLists.txt b/contrib/llvm/lib/Object/CMakeLists.txt deleted file mode 100644 index 6a6814f..0000000 --- a/contrib/llvm/lib/Object/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMObject - MachOObject.cpp - ObjectFile.cpp - COFFObjectFile.cpp - ELFObjectFile.cpp - ) diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 07de6bc..750c34d 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/COFF.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -114,7 +115,7 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb, return object_error::success; } -error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, +error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb, uint64_t &Result) const { const coff_symbol *symb = toSymb(Symb); const coff_section *Section = NULL; @@ -132,6 +133,55 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, return object_error::success; } +error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { + const coff_symbol *symb = toSymb(Symb); + const coff_section *Section = NULL; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; + if (Type == 'U' || Type == 'w') + Result = UnknownAddressOrSize; + else if (Section) + Result = reinterpret_cast<uintptr_t>(base() + + Section->PointerToRawData + + symb->Value); + else + Result = reinterpret_cast<uintptr_t>(base() + symb->Value); + return object_error::success; +} + +error_code COFFObjectFile::getSymbolType(DataRefImpl Symb, + SymbolRef::SymbolType &Result) const { + const coff_symbol *symb = toSymb(Symb); + Result = SymbolRef::ST_Other; + if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL && + symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) { + Result = SymbolRef::ST_External; + } else { + if (symb->Type.ComplexType == COFF::IMAGE_SYM_DTYPE_FUNCTION) { + Result = SymbolRef::ST_Function; + } else { + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; + if (Type == 'r' || Type == 'R') { + Result = SymbolRef::ST_Data; + } + } + } + return object_error::success; +} + +error_code COFFObjectFile::isSymbolGlobal(DataRefImpl Symb, + bool &Result) const { + const coff_symbol *symb = toSymb(Symb); + Result = (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL); + return object_error::success; +} + error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb, uint64_t &Result) const { // FIXME: Return the correct size. This requires looking at all the symbols @@ -286,6 +336,15 @@ error_code COFFObjectFile::getSectionContents(DataRefImpl Sec, return object_error::success; } +error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec, + uint64_t &Res) const { + const coff_section *sec = toSec(Sec); + if (!sec) + return object_error::parse_failed; + Res = uint64_t(1) << (((sec->Characteristics & 0x00F00000) >> 20) - 1); + return object_error::success; +} + error_code COFFObjectFile::isSectionText(DataRefImpl Sec, bool &Result) const { const coff_section *sec = toSec(Sec); @@ -293,14 +352,61 @@ error_code COFFObjectFile::isSectionText(DataRefImpl Sec, return object_error::success; } +error_code COFFObjectFile::isSectionData(DataRefImpl Sec, + bool &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + return object_error::success; +} + +error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec, + bool &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + return object_error::success; +} + error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const { - // FIXME: Unimplemented. - Result = false; + const coff_section *sec = toSec(Sec); + const coff_symbol *symb = toSymb(Symb); + const coff_section *symb_sec; + if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec; + if (symb_sec == sec) + Result = true; + else + Result = false; return object_error::success; } +relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { + const coff_section *sec = toSec(Sec); + DataRefImpl ret; + std::memset(&ret, 0, sizeof(ret)); + if (sec->NumberOfRelocations == 0) + ret.p = 0; + else + ret.p = reinterpret_cast<uintptr_t>(base() + sec->PointerToRelocations); + + return relocation_iterator(RelocationRef(ret, this)); +} + +relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { + const coff_section *sec = toSec(Sec); + DataRefImpl ret; + std::memset(&ret, 0, sizeof(ret)); + if (sec->NumberOfRelocations == 0) + ret.p = 0; + else + ret.p = reinterpret_cast<uintptr_t>( + reinterpret_cast<const coff_relocation*>( + base() + sec->PointerToRelocations) + + sec->NumberOfRelocations); + + return relocation_iterator(RelocationRef(ret, this)); +} + COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) : ObjectFile(Binary::isCOFF, Object, ec) { // Check that we at least have enough room for a header. @@ -327,7 +433,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart); if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header))) return; - + SectionTable = reinterpret_cast<const coff_section *>( base() + HeaderStart @@ -360,18 +466,18 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) ec = object_error::parse_failed; return; } - + ec = object_error::success; } -ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { +symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } -ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { +symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; std::memset(&ret, 0, sizeof(DataRefImpl)); @@ -379,14 +485,14 @@ ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { return symbol_iterator(SymbolRef(ret, this)); } -ObjectFile::section_iterator COFFObjectFile::begin_sections() const { +section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable); return section_iterator(SectionRef(ret, this)); } -ObjectFile::section_iterator COFFObjectFile::end_sections() const { +section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections); @@ -445,6 +551,121 @@ error_code COFFObjectFile::getString(uint32_t offset, return object_error::success; } +error_code COFFObjectFile::getSymbol(uint32_t index, + const coff_symbol *&Result) const { + if (index > 0 && index < Header->NumberOfSymbols) + Result = SymbolTable + index; + else + return object_error::parse_failed; + return object_error::success; +} + +const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { + return reinterpret_cast<const coff_relocation*>(Rel.p); +} +error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel, + RelocationRef &Res) const { + Rel.p = reinterpret_cast<uintptr_t>( + reinterpret_cast<const coff_relocation*>(Rel.p) + 1); + Res = RelocationRef(Rel, this); + return object_error::success; +} +error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const { + Res = toRel(Rel)->VirtualAddress; + return object_error::success; +} +error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Res) const { + const coff_relocation* R = toRel(Rel); + DataRefImpl Symb; + Symb.p = reinterpret_cast<uintptr_t>(SymbolTable + R->SymbolTableIndex); + Res = SymbolRef(Symb, this); + return object_error::success; +} +error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, + uint32_t &Res) const { + const coff_relocation* R = toRel(Rel); + Res = R->Type; + return object_error::success; +} + +#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \ + case COFF::enum: res = #enum; break; + +error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + const coff_relocation *reloc = toRel(Rel); + StringRef res; + switch (Header->Machine) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + switch (reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32); + default: + res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_I386: + switch (reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32); + default: + res = "Unknown"; + } + break; + default: + res = "Unknown"; + } + Result.append(res.begin(), res.end()); + return object_error::success; +} + +#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME + +error_code COFFObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Res) const { + Res = 0; + return object_error::success; +} +error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + const coff_relocation *reloc = toRel(Rel); + const coff_symbol *symb = 0; + if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec; + DataRefImpl sym; + ::memset(&sym, 0, sizeof(sym)); + sym.p = reinterpret_cast<uintptr_t>(symb); + StringRef symname; + if (error_code ec = getSymbolName(sym, symname)) return ec; + Result.append(symname.begin(), symname.end()); + return object_error::success; +} + namespace llvm { ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) { diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp index e2ff4df..257d08c 100644 --- a/contrib/llvm/lib/Object/ELFObjectFile.cpp +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -14,11 +14,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> #include <limits> #include <utility> @@ -176,12 +179,89 @@ struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> { } namespace { +template<support::endianness target_endianness, bool is64Bits, bool isRela> +struct Elf_Rel_Base; + +template<support::endianness target_endianness> +struct Elf_Rel_Base<target_endianness, false, false> { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Word r_info; // Symbol table index and type of relocation to apply +}; + +template<support::endianness target_endianness> +struct Elf_Rel_Base<target_endianness, true, false> { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Xword r_info; // Symbol table index and type of relocation to apply +}; + +template<support::endianness target_endianness> +struct Elf_Rel_Base<target_endianness, false, true> { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Word r_info; // Symbol table index and type of relocation to apply + Elf_Sword r_addend; // Compute value for relocatable field by adding this +}; + +template<support::endianness target_endianness> +struct Elf_Rel_Base<target_endianness, true, true> { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Xword r_info; // Symbol table index and type of relocation to apply + Elf_Sxword r_addend; // Compute value for relocatable field by adding this. +}; + +template<support::endianness target_endianness, bool is64Bits, bool isRela> +struct Elf_Rel_Impl; + +template<support::endianness target_endianness, bool isRela> +struct Elf_Rel_Impl<target_endianness, true, isRela> + : Elf_Rel_Base<target_endianness, true, isRela> { + using Elf_Rel_Base<target_endianness, true, isRela>::r_info; + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + uint64_t getSymbol() const { return (r_info >> 32); } + unsigned char getType() const { + return (unsigned char) (r_info & 0xffffffffL); + } + void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(uint64_t s, unsigned char t) { + r_info = (s << 32) + (t&0xffffffffL); + } +}; + +template<support::endianness target_endianness, bool isRela> +struct Elf_Rel_Impl<target_endianness, false, isRela> + : Elf_Rel_Base<target_endianness, false, isRela> { + using Elf_Rel_Base<target_endianness, false, isRela>::r_info; + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + + // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, + // and ELF32_R_INFO macros defined in the ELF specification: + uint32_t getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); } + void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(uint32_t s, unsigned char t) { + r_info = (s << 8) + t; + } +}; + +} + +namespace { template<support::endianness target_endianness, bool is64Bits> class ELFObjectFile : public ObjectFile { LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr; typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym; + typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel; + typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela; struct Elf_Ehdr { unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes @@ -206,37 +286,81 @@ class ELFObjectFile : public ObjectFile { unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } }; - typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t; + typedef SmallVector<const Elf_Shdr*, 1> Sections_t; + typedef DenseMap<unsigned, unsigned> IndexMap_t; + typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t; const Elf_Ehdr *Header; const Elf_Shdr *SectionHeaderTable; const Elf_Shdr *dot_shstrtab_sec; // Section header string table. const Elf_Shdr *dot_strtab_sec; // Symbol header string table. - SymbolTableSections_t SymbolTableSections; + Sections_t SymbolTableSections; + IndexMap_t SymbolTableSectionsIndexMap; + DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable; + + /// @brief Map sections to an array of relocation sections that reference + /// them sorted by section index. + RelocMap_t SectionRelocMap; + + /// @brief Get the relocation section that contains \a Rel. + const Elf_Shdr *getRelSection(DataRefImpl Rel) const { + return getSection(Rel.w.b); + } void validateSymbol(DataRefImpl Symb) const; + bool isRelocationHasAddend(DataRefImpl Rel) const; + template<typename T> + const T *getEntry(uint16_t Section, uint32_t Entry) const; + template<typename T> + const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const; const Elf_Sym *getSymbol(DataRefImpl Symb) const; const Elf_Shdr *getSection(DataRefImpl index) const; - const Elf_Shdr *getSection(uint16_t index) const; - const char *getString(uint16_t section, uint32_t offset) const; + const Elf_Shdr *getSection(uint32_t index) const; + const Elf_Rel *getRel(DataRefImpl Rel) const; + const Elf_Rela *getRela(DataRefImpl Rela) const; + const char *getString(uint32_t section, uint32_t offset) const; const char *getString(const Elf_Shdr *section, uint32_t offset) const; + error_code getSymbolName(const Elf_Sym *Symb, StringRef &Res) const; protected: virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; + virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const; + virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const; virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const; virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const; virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const; + virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const; + virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const; + + virtual error_code getRelocationNext(DataRefImpl Rel, + RelocationRef &Res) const; + virtual error_code getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const; + virtual error_code getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Res) const; + virtual error_code getRelocationType(DataRefImpl Rel, + uint32_t &Res) const; + virtual error_code getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const; + virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Res) const; + virtual error_code getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const; public: ELFObjectFile(MemoryBuffer *Object, error_code &ec); @@ -248,6 +372,11 @@ public: virtual uint8_t getBytesInAddress() const; virtual StringRef getFileFormatName() const; virtual unsigned getArch() const; + + uint64_t getNumSections() const; + uint64_t getStringTableIndex() const; + ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; + const Elf_Shdr *getSection(const Elf_Sym *symb) const; }; } // end namespace @@ -299,29 +428,37 @@ error_code ELFObjectFile<target_endianness, is64Bits> ::getSymbolName(DataRefImpl Symb, StringRef &Result) const { validateSymbol(Symb); - const Elf_Sym *symb = getSymbol(Symb); - if (symb->st_name == 0) { - const Elf_Shdr *section = getSection(symb->st_shndx); - if (!section) - Result = ""; - else - Result = getString(dot_shstrtab_sec, section->sh_name); - return object_error::success; - } + const Elf_Sym *symb = getSymbol(Symb); + return getSymbolName(symb, Result); +} - // Use the default symbol table name section. - Result = getString(dot_strtab_sec, symb->st_name); - return object_error::success; +template<support::endianness target_endianness, bool is64Bits> +ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits> + ::getSymbolTableIndex(const Elf_Sym *symb) const { + if (symb->st_shndx == ELF::SHN_XINDEX) + return ExtendedSymbolTable.lookup(symb); + return symb->st_shndx; +} + +template<support::endianness target_endianness, bool is64Bits> +const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr * +ELFObjectFile<target_endianness, is64Bits> + ::getSection(const Elf_Sym *symb) const { + if (symb->st_shndx == ELF::SHN_XINDEX) + return getSection(ExtendedSymbolTable.lookup(symb)); + if (symb->st_shndx >= ELF::SHN_LORESERVE) + return 0; + return getSection(symb->st_shndx); } template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> - ::getSymbolAddress(DataRefImpl Symb, - uint64_t &Result) const { + ::getSymbolOffset(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; - switch (symb->st_shndx) { + switch (getSymbolTableIndex(symb)) { case ELF::SHN_COMMON: // Undefined symbols have no address yet. case ELF::SHN_UNDEF: @@ -330,7 +467,7 @@ error_code ELFObjectFile<target_endianness, is64Bits> case ELF::SHN_ABS: Result = symb->st_value; return object_error::success; - default: Section = getSection(symb->st_shndx); + default: Section = getSection(symb); } switch (symb->getType()) { @@ -350,6 +487,43 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *Section; + switch (getSymbolTableIndex(symb)) { + case ELF::SHN_COMMON: // Fall through. + // Undefined symbols have no address yet. + case ELF::SHN_UNDEF: + Result = UnknownAddressOrSize; + return object_error::success; + case ELF::SHN_ABS: + Result = reinterpret_cast<uintptr_t>(base()+symb->st_value); + return object_error::success; + default: Section = getSection(symb); + } + const uint8_t* addr = base(); + if (Section) + addr += Section->sh_offset; + switch (symb->getType()) { + case ELF::STT_SECTION: + Result = reinterpret_cast<uintptr_t>(addr); + return object_error::success; + case ELF::STT_FUNC: // Fall through. + case ELF::STT_OBJECT: // Fall through. + case ELF::STT_NOTYPE: + addr += symb->st_value; + Result = reinterpret_cast<uintptr_t>(addr); + return object_error::success; + default: + Result = UnknownAddressOrSize; + return object_error::success; + } +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::getSymbolSize(DataRefImpl Symb, uint64_t &Result) const { validateSymbol(Symb); @@ -366,7 +540,7 @@ error_code ELFObjectFile<target_endianness, is64Bits> char &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); - const Elf_Shdr *Section = getSection(symb->st_shndx); + const Elf_Shdr *Section = getSection(symb); char ret = '?'; @@ -389,7 +563,7 @@ error_code ELFObjectFile<target_endianness, is64Bits> } } - switch (symb->st_shndx) { + switch (getSymbolTableIndex(symb)) { case ELF::SHN_UNDEF: if (ret == '?') ret = 'U'; @@ -401,7 +575,7 @@ error_code ELFObjectFile<target_endianness, is64Bits> switch (symb->getBinding()) { case ELF::STB_GLOBAL: ret = ::toupper(ret); break; case ELF::STB_WEAK: - if (symb->st_shndx == ELF::SHN_UNDEF) + if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) ret = 'w'; else if (symb->getType() == ELF::STT_OBJECT) @@ -416,7 +590,8 @@ error_code ELFObjectFile<target_endianness, is64Bits> return ec; Result = StringSwitch<char>(name) .StartsWith(".debug", 'N') - .StartsWith(".note", 'n'); + .StartsWith(".note", 'n') + .Default('?'); return object_error::success; } @@ -426,6 +601,43 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolType(DataRefImpl Symb, + SymbolRef::SymbolType &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + + if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) { + Result = SymbolRef::ST_External; + return object_error::success; + } + + switch (symb->getType()) { + case ELF::STT_FUNC: + Result = SymbolRef::ST_Function; + break; + case ELF::STT_OBJECT: + Result = SymbolRef::ST_Data; + break; + default: + Result = SymbolRef::ST_Other; + break; + } + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSymbolGlobal(DataRefImpl Symb, + bool &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + + Result = symb->getBinding() == ELF::STB_GLOBAL; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::isSymbolInternal(DataRefImpl Symb, bool &Result) const { validateSymbol(Symb); @@ -487,6 +699,15 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionAlignment(DataRefImpl Sec, + uint64_t &Result) const { + const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); + Result = sec->sh_addralign; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::isSectionText(DataRefImpl Sec, bool &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); @@ -499,6 +720,32 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::isSectionData(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); + if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) + && sec->sh_type == ELF::SHT_PROGBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSectionBSS(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); + if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) + && sec->sh_type == ELF::SHT_NOBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const { @@ -508,6 +755,330 @@ error_code ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> +relocation_iterator ELFObjectFile<target_endianness, is64Bits> + ::getSectionRelBegin(DataRefImpl Sec) const { + DataRefImpl RelData; + memset(&RelData, 0, sizeof(RelData)); + const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); + typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); + if (sec != 0 && ittr != SectionRelocMap.end()) { + RelData.w.a = getSection(ittr->second[0])->sh_info; + RelData.w.b = ittr->second[0]; + RelData.w.c = 0; + } + return relocation_iterator(RelocationRef(RelData, this)); +} + +template<support::endianness target_endianness, bool is64Bits> +relocation_iterator ELFObjectFile<target_endianness, is64Bits> + ::getSectionRelEnd(DataRefImpl Sec) const { + DataRefImpl RelData; + memset(&RelData, 0, sizeof(RelData)); + const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); + typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); + if (sec != 0 && ittr != SectionRelocMap.end()) { + // Get the index of the last relocation section for this section. + std::size_t relocsecindex = ittr->second[ittr->second.size() - 1]; + const Elf_Shdr *relocsec = getSection(relocsecindex); + RelData.w.a = relocsec->sh_info; + RelData.w.b = relocsecindex; + RelData.w.c = relocsec->sh_size / relocsec->sh_entsize; + } + return relocation_iterator(RelocationRef(RelData, this)); +} + +// Relocations +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationNext(DataRefImpl Rel, + RelocationRef &Result) const { + ++Rel.w.c; + const Elf_Shdr *relocsec = getSection(Rel.w.b); + if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) { + // We have reached the end of the relocations for this section. See if there + // is another relocation section. + typename RelocMap_t::mapped_type relocseclist = + SectionRelocMap.lookup(getSection(Rel.w.a)); + + // Do a binary search for the current reloc section index (which must be + // present). Then get the next one. + typename RelocMap_t::mapped_type::const_iterator loc = + std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b); + ++loc; + + // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel + // to the end iterator. + if (loc != relocseclist.end()) { + Rel.w.b = *loc; + Rel.w.a = 0; + } + } + Result = RelocationRef(Rel, this); + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Result) const { + uint32_t symbolIdx; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + symbolIdx = getRel(Rel)->getSymbol(); + break; + } + case ELF::SHT_RELA : { + symbolIdx = getRela(Rel)->getSymbol(); + break; + } + } + DataRefImpl SymbolData; + IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link); + if (it == SymbolTableSectionsIndexMap.end()) + report_fatal_error("Relocation symbol table not found!"); + SymbolData.d.a = symbolIdx; + SymbolData.d.b = it->second; + Result = SymbolRef(SymbolData, this); + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationAddress(DataRefImpl Rel, + uint64_t &Result) const { + uint64_t offset; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + offset = getRel(Rel)->r_offset; + break; + } + case ELF::SHT_RELA : { + offset = getRela(Rel)->r_offset; + break; + } + } + + Result = offset; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationType(DataRefImpl Rel, + uint32_t &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + Result = getRel(Rel)->getType(); + break; + } + case ELF::SHT_RELA : { + Result = getRela(Rel)->getType(); + break; + } + } + return object_error::success; +} + +#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \ + case ELF::enum: res = #enum; break; + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + uint8_t type; + StringRef res; + switch (sec->sh_type) { + default : + return object_error::parse_failed; + case ELF::SHT_REL : { + type = getRel(Rel)->getType(); + break; + } + case ELF::SHT_RELA : { + type = getRela(Rel)->getType(); + break; + } + } + switch (Header->e_machine) { + case ELF::EM_X86_64: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC); + default: + res = "Unknown"; + } + break; + case ELF::EM_386: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE); + default: + res = "Unknown"; + } + break; + default: + res = "Unknown"; + } + Result.append(res.begin(), res.end()); + return object_error::success; +} + +#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + Result = 0; + return object_error::success; + } + case ELF::SHT_RELA : { + Result = getRela(Rel)->r_addend; + return object_error::success; + } + } +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + uint8_t type; + StringRef res; + int64_t addend = 0; + uint16_t symbol_index = 0; + switch (sec->sh_type) { + default : + return object_error::parse_failed; + case ELF::SHT_REL : { + type = getRel(Rel)->getType(); + symbol_index = getRel(Rel)->getSymbol(); + // TODO: Read implicit addend from section data. + break; + } + case ELF::SHT_RELA : { + type = getRela(Rel)->getType(); + symbol_index = getRela(Rel)->getSymbol(); + addend = getRela(Rel)->r_addend; + break; + } + } + const Elf_Sym *symb = getEntry<Elf_Sym>(sec->sh_link, symbol_index); + StringRef symname; + if (error_code ec = getSymbolName(symb, symname)) + return ec; + switch (Header->e_machine) { + case ELF::EM_X86_64: + switch (type) { + case ELF::R_X86_64_32S: + res = symname; + break; + case ELF::R_X86_64_PC32: { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + fmt << symname << (addend < 0 ? "" : "+") << addend << "-P"; + fmt.flush(); + Result.append(fmtbuf.begin(), fmtbuf.end()); + } + break; + default: + res = "Unknown"; + } + break; + default: + res = "Unknown"; + } + if (Result.empty()) + Result.append(res.begin(), res.end()); + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object , error_code &ec) : ObjectFile(Binary::isELF, Object, ec) @@ -521,25 +1092,41 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object SectionHeaderTable = reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff); - uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize; + uint64_t SectionTableSize = getNumSections() * Header->e_shentsize; if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize <= base() + Data->getBufferSize())) // FIXME: Proper error handling. report_fatal_error("Section table goes past end of file!"); - // To find the symbol tables we walk the section table to find SHT_STMTAB. - for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable), - *e = i + Header->e_shnum * Header->e_shentsize; - i != e; i += Header->e_shentsize) { - const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i); + // To find the symbol tables we walk the section table to find SHT_SYMTAB. + const Elf_Shdr* SymbolTableSectionHeaderIndex = 0; + const Elf_Shdr* sh = reinterpret_cast<const Elf_Shdr*>(SectionHeaderTable); + for (uint64_t i = 0, e = getNumSections(); i != e; ++i) { + if (sh->sh_type == ELF::SHT_SYMTAB_SHNDX) { + if (SymbolTableSectionHeaderIndex) + // FIXME: Proper error handling. + report_fatal_error("More than one .symtab_shndx!"); + SymbolTableSectionHeaderIndex = sh; + } if (sh->sh_type == ELF::SHT_SYMTAB) { + SymbolTableSectionsIndexMap[i] = SymbolTableSections.size(); SymbolTableSections.push_back(sh); } + if (sh->sh_type == ELF::SHT_REL || sh->sh_type == ELF::SHT_RELA) { + SectionRelocMap[getSection(sh->sh_info)].push_back(i); + } + ++sh; + } + + // Sort section relocation lists by index. + for (typename RelocMap_t::iterator i = SectionRelocMap.begin(), + e = SectionRelocMap.end(); i != e; ++i) { + std::sort(i->second.begin(), i->second.end()); } // Get string table sections. - dot_shstrtab_sec = getSection(Header->e_shstrndx); + dot_shstrtab_sec = getSection(getStringTableIndex()); if (dot_shstrtab_sec) { // Verify that the last byte in the string table in a null. if (((const char*)base() + dot_shstrtab_sec->sh_offset) @@ -550,7 +1137,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object // Merge this into the above loop. for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable), - *e = i + Header->e_shnum * Header->e_shentsize; + *e = i + getNumSections() * Header->e_shentsize; i != e; i += Header->e_shentsize) { const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i); if (sh->sh_type == ELF::SHT_STRTAB) { @@ -567,11 +1154,26 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object } } } + + // Build symbol name side-mapping if there is one. + if (SymbolTableSectionHeaderIndex) { + const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() + + SymbolTableSectionHeaderIndex->sh_offset); + error_code ec; + for (symbol_iterator si = begin_symbols(), + se = end_symbols(); si != se; si.increment(ec)) { + if (ec) + report_fatal_error("Fewer extended symbol table entries than symbols!"); + if (*ShndxTable != ELF::SHN_UNDEF) + ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable; + ++ShndxTable; + } + } } template<support::endianness target_endianness, bool is64Bits> -ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits> - ::begin_symbols() const { +symbol_iterator ELFObjectFile<target_endianness, is64Bits> + ::begin_symbols() const { DataRefImpl SymbolData; memset(&SymbolData, 0, sizeof(SymbolData)); if (SymbolTableSections.size() == 0) { @@ -585,8 +1187,8 @@ ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> -ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits> - ::end_symbols() const { +symbol_iterator ELFObjectFile<target_endianness, is64Bits> + ::end_symbols() const { DataRefImpl SymbolData; memset(&SymbolData, 0, sizeof(SymbolData)); SymbolData.d.a = std::numeric_limits<uint32_t>::max(); @@ -595,8 +1197,8 @@ ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> -ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> - ::begin_sections() const { +section_iterator ELFObjectFile<target_endianness, is64Bits> + ::begin_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff); @@ -604,13 +1206,13 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> -ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> - ::end_sections() const { +section_iterator ELFObjectFile<target_endianness, is64Bits> + ::end_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff - + (Header->e_shentsize * Header->e_shnum)); + + (Header->e_shentsize*getNumSections())); return section_iterator(SectionRef(ret, this)); } @@ -629,6 +1231,8 @@ StringRef ELFObjectFile<target_endianness, is64Bits> return "ELF32-i386"; case ELF::EM_X86_64: return "ELF32-x86-64"; + case ELF::EM_ARM: + return "ELF32-arm"; default: return "ELF32-unknown"; } @@ -654,26 +1258,75 @@ unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const { return Triple::x86; case ELF::EM_X86_64: return Triple::x86_64; + case ELF::EM_ARM: + return Triple::arm; default: return Triple::UnknownArch; } } template<support::endianness target_endianness, bool is64Bits> +uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const { + if (Header->e_shnum == ELF::SHN_UNDEF) + return SectionHeaderTable->sh_size; + return Header->e_shnum; +} + +template<support::endianness target_endianness, bool is64Bits> +uint64_t +ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const { + if (Header->e_shnum == ELF::SHN_UNDEF) { + if (Header->e_shstrndx == ELF::SHN_HIRESERVE) + return SectionHeaderTable->sh_link; + if (Header->e_shstrndx >= getNumSections()) + return 0; + } + return Header->e_shstrndx; +} + + +template<support::endianness target_endianness, bool is64Bits> +template<typename T> +inline const T * +ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section, + uint32_t Entry) const { + return getEntry<T>(getSection(Section), Entry); +} + +template<support::endianness target_endianness, bool is64Bits> +template<typename T> +inline const T * +ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section, + uint32_t Entry) const { + return reinterpret_cast<const T *>( + base() + + Section->sh_offset + + (Entry * Section->sh_entsize)); +} + +template<support::endianness target_endianness, bool is64Bits> const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym * ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const { - const Elf_Shdr *sec = SymbolTableSections[Symb.d.b]; - return reinterpret_cast<const Elf_Sym *>( - base() - + sec->sh_offset - + (Symb.d.a * sec->sh_entsize)); + return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a); +} + +template<support::endianness target_endianness, bool is64Bits> +const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel * +ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const { + return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c); +} + +template<support::endianness target_endianness, bool is64Bits> +const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela * +ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const { + return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c); } template<support::endianness target_endianness, bool is64Bits> const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr * ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const { const Elf_Shdr *sec = getSection(Symb.d.b); - if (sec->sh_type != ELF::SHT_SYMTAB) + if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM) // FIXME: Proper error handling. report_fatal_error("Invalid symbol table section!"); return sec; @@ -681,10 +1334,10 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const { template<support::endianness target_endianness, bool is64Bits> const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr * -ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const { - if (index == 0 || index >= ELF::SHN_LORESERVE) +ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const { + if (index == 0) return 0; - if (!SectionHeaderTable || index >= Header->e_shnum) + if (!SectionHeaderTable || index >= getNumSections()) // FIXME: Proper error handling. report_fatal_error("Invalid section index!"); @@ -695,7 +1348,7 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const { template<support::endianness target_endianness, bool is64Bits> const char *ELFObjectFile<target_endianness, is64Bits> - ::getString(uint16_t section, + ::getString(uint32_t section, ELF::Elf32_Word offset) const { return getString(getSection(section), offset); } @@ -711,6 +1364,24 @@ const char *ELFObjectFile<target_endianness, is64Bits> return (const char *)base() + section->sh_offset + offset; } +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolName(const Elf_Sym *symb, + StringRef &Result) const { + if (symb->st_name == 0) { + const Elf_Shdr *section = getSection(symb); + if (!section) + Result = ""; + else + Result = getString(dot_shstrtab_sec, section->sh_name); + return object_error::success; + } + + // Use the default symbol table name section. + Result = getString(dot_strtab_sec, symb->st_name); + return object_error::success; +} + // EI_CLASS, EI_DATA. static std::pair<unsigned char, unsigned char> getElfArchType(MemoryBuffer *Object) { diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp index 9890feb..9cdac86 100644 --- a/contrib/llvm/lib/Object/MachOObject.cpp +++ b/contrib/llvm/lib/Object/MachOObject.cpp @@ -9,6 +9,7 @@ #include "llvm/Object/MachOObject.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" @@ -244,6 +245,18 @@ void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI, } template<> +void SwapStruct(macho::LinkeditDataLoadCommand &Value) { + SwapValue(Value.Type); + SwapValue(Value.Size); + SwapValue(Value.DataOffset); + SwapValue(Value.DataSize); +} +void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI, + InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const { + ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); +} + +template<> void SwapStruct(macho::IndirectSymbolTableEntry &Value) { SwapValue(Value.Index); } @@ -343,6 +356,31 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } + +void MachOObject::ReadULEB128s(uint64_t Index, + SmallVectorImpl<uint64_t> &Out) const { + const char *ptr = Buffer->getBufferStart() + Index; + uint64_t data = 0; + uint64_t delta = 0; + uint32_t shift = 0; + while (true) { + assert(ptr < Buffer->getBufferEnd() && "index out of bounds"); + assert(shift < 64 && "too big for uint64_t"); + + uint8_t byte = *ptr++; + delta |= ((byte & 0x7F) << shift); + shift += 7; + if (byte < 0x80) { + if (delta == 0) + break; + data += delta; + Out.push_back(data); + delta = 0; + shift = 0; + } + } +} + /* ** */ // Object Dumping Facilities void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; } diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 26a6e13..507df58 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -13,11 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" +#include "llvm/Object/MachO.h" #include "llvm/Object/MachOFormat.h" -#include "llvm/Object/MachOObject.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MachO.h" #include <cctype> #include <cstring> @@ -27,56 +25,24 @@ using namespace llvm; using namespace object; namespace llvm { +namespace object { -typedef MachOObject::LoadCommandInfo LoadCommandInfo; - -class MachOObjectFile : public ObjectFile { -public: - MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec) +MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, + error_code &ec) : ObjectFile(Binary::isMachO, Object, ec), MachOObj(MOO), - RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {} - - virtual symbol_iterator begin_symbols() const; - virtual symbol_iterator end_symbols() const; - virtual section_iterator begin_sections() const; - virtual section_iterator end_sections() const; - - virtual uint8_t getBytesInAddress() const; - virtual StringRef getFileFormatName() const; - virtual unsigned getArch() const; - -protected: - virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; - virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; - virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; - virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; - virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; - virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; - - virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; - virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; - virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; - virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; - virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; - virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; - virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S, - bool &Result) const; - -private: - MachOObject *MachOObj; - mutable uint32_t RegisteredStringTable; - - void moveToNextSection(DataRefImpl &DRI) const; - void getSymbolTableEntry(DataRefImpl DRI, - InMemoryStruct<macho::SymbolTableEntry> &Res) const; - void getSymbol64TableEntry(DataRefImpl DRI, - InMemoryStruct<macho::Symbol64TableEntry> &Res) const; - void moveToNextSymbol(DataRefImpl &DRI) const; - void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const; - void getSection64(DataRefImpl DRI, - InMemoryStruct<macho::Section64> &Res) const; -}; + RegisteredStringTable(std::numeric_limits<uint32_t>::max()) { + DataRefImpl DRI; + DRI.d.a = DRI.d.b = 0; + moveToNextSection(DRI); + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + while (DRI.d.a < LoadCommandCount) { + Sections.push_back(DRI); + DRI.d.b++; + moveToNextSection(DRI); + } +} + ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { error_code ec; @@ -158,6 +124,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI, return object_error::success; } +error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI, + uint64_t &Result) const { + uint64_t SectionOffset; + uint8_t SectionIndex; + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + Result = Entry->Value; + SectionIndex = Entry->SectionIndex; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + Result = Entry->Value; + SectionIndex = Entry->SectionIndex; + } + getSectionAddress(Sections[SectionIndex-1], SectionOffset); + Result -= SectionOffset; + + return object_error::success; +} + error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI, uint64_t &Result) const { if (MachOObj->is64Bit()) { @@ -227,7 +214,51 @@ error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI, return object_error::success; } -ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { +error_code MachOObjectFile::isSymbolGlobal(DataRefImpl Symb, bool &Res) const { + + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(Symb, Entry); + Res = Entry->Type & MachO::NlistMaskExternal; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(Symb, Entry); + Res = Entry->Type & MachO::NlistMaskExternal; + } + return object_error::success; +} + +error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, + SymbolRef::SymbolType &Res) const { + uint8_t n_type; + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(Symb, Entry); + n_type = Entry->Type; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(Symb, Entry); + n_type = Entry->Type; + } + Res = SymbolRef::ST_Other; + + // If this is a STAB debugging symbol, we can do nothing more. + if (n_type & MachO::NlistMaskStab) + return object_error::success; + + switch (n_type & MachO::NlistMaskType) { + case MachO::NListTypeUndefined : + Res = SymbolRef::ST_External; + break; + case MachO::NListTypeSection : + Res = SymbolRef::ST_Function; + break; + } + return object_error::success; +} + + +symbol_iterator MachOObjectFile::begin_symbols() const { // DRI.d.a = segment number; DRI.d.b = symbol index. DataRefImpl DRI; DRI.d.a = DRI.d.b = 0; @@ -235,7 +266,7 @@ ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { return symbol_iterator(SymbolRef(DRI, this)); } -ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const { +symbol_iterator MachOObjectFile::end_symbols() const { DataRefImpl DRI; DRI.d.a = MachOObj->getHeader().NumLoadCommands; DRI.d.b = 0; @@ -283,6 +314,13 @@ MachOObjectFile::getSection(DataRefImpl DRI, MachOObj->ReadSection(LCI, DRI.d.b, Res); } +std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + SectionList::const_iterator loc = + std::find(Sections.begin(), Sections.end(), Sec); + assert(loc != Sections.end() && "Sec is not a valid section!"); + return std::distance(Sections.begin(), loc); +} + void MachOObjectFile::getSection64(DataRefImpl DRI, InMemoryStruct<macho::Section64> &Res) const { @@ -371,6 +409,20 @@ error_code MachOObjectFile::getSectionContents(DataRefImpl DRI, return object_error::success; } +error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI, + uint64_t &Result) const { + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = uint64_t(1) << Sect->Align; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = uint64_t(1) << Sect->Align; + } + return object_error::success; +} + error_code MachOObjectFile::isSectionText(DataRefImpl DRI, bool &Result) const { if (is64BitLoadCommand(MachOObj, DRI)) { @@ -385,35 +437,185 @@ error_code MachOObjectFile::isSectionText(DataRefImpl DRI, return object_error::success; } +error_code MachOObjectFile::isSectionData(DataRefImpl DRI, + bool &Result) const { + // FIXME: Unimplemented. + Result = false; + return object_error::success; +} + +error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, + bool &Result) const { + // FIXME: Unimplemented. + Result = false; + return object_error::success; +} + error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const { + SymbolRef::SymbolType ST; + getSymbolType(Symb, ST); + if (ST == SymbolRef::ST_External) { + Result = false; + return object_error::success; + } + + uint64_t SectBegin, SectEnd; + getSectionAddress(Sec, SectBegin); + getSectionSize(Sec, SectEnd); + SectEnd += SectBegin; + if (MachOObj->is64Bit()) { InMemoryStruct<macho::Symbol64TableEntry> Entry; getSymbol64TableEntry(Symb, Entry); - Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b; + uint64_t SymAddr= Entry->Value; + Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd); } else { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(Symb, Entry); - Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b; + uint64_t SymAddr= Entry->Value; + Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd); } + return object_error::success; } -ObjectFile::section_iterator MachOObjectFile::begin_sections() const { +relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const { + DataRefImpl ret; + ret.d.a = 0; + ret.d.b = getSectionIndex(Sec); + return relocation_iterator(RelocationRef(ret, this)); +} +relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { + uint32_t last_reloc; + if (is64BitLoadCommand(MachOObj, Sec)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(Sec, Sect); + last_reloc = Sect->NumRelocationTableEntries; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(Sec, Sect); + last_reloc = Sect->NumRelocationTableEntries; + } + DataRefImpl ret; + ret.d.a = last_reloc; + ret.d.b = getSectionIndex(Sec); + return relocation_iterator(RelocationRef(ret, this)); +} + +section_iterator MachOObjectFile::begin_sections() const { DataRefImpl DRI; DRI.d.a = DRI.d.b = 0; moveToNextSection(DRI); return section_iterator(SectionRef(DRI, this)); } -ObjectFile::section_iterator MachOObjectFile::end_sections() const { +section_iterator MachOObjectFile::end_sections() const { DataRefImpl DRI; DRI.d.a = MachOObj->getHeader().NumLoadCommands; DRI.d.b = 0; return section_iterator(SectionRef(DRI, this)); } +/*===-- Relocations -------------------------------------------------------===*/ + +void MachOObjectFile:: +getRelocation(DataRefImpl Rel, + InMemoryStruct<macho::RelocationEntry> &Res) const { + uint32_t relOffset; + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Section64> Sect; + getSection64(Sections[Rel.d.b], Sect); + relOffset = Sect->RelocationTableOffset; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(Sections[Rel.d.b], Sect); + relOffset = Sect->RelocationTableOffset; + } + MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res); +} +error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel, + RelocationRef &Res) const { + ++Rel.d.a; + Res = RelocationRef(Rel, this); + return object_error::success; +} +error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const { + const uint8_t* sectAddress = base(); + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Section64> Sect; + getSection64(Sections[Rel.d.b], Sect); + sectAddress += Sect->Offset; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(Sections[Rel.d.b], Sect); + sectAddress += Sect->Offset; + } + InMemoryStruct<macho::RelocationEntry> RE; + getRelocation(Rel, RE); + Res = reinterpret_cast<uintptr_t>(sectAddress + RE->Word0); + return object_error::success; +} +error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Res) const { + InMemoryStruct<macho::RelocationEntry> RE; + getRelocation(Rel, RE); + uint32_t SymbolIdx = RE->Word1 & 0xffffff; + bool isExtern = (RE->Word1 >> 27) & 1; + + DataRefImpl Sym; + Sym.d.a = Sym.d.b = 0; + moveToNextSymbol(Sym); + if (isExtern) { + for (unsigned i = 0; i < SymbolIdx; i++) { + Sym.d.b++; + moveToNextSymbol(Sym); + assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands && + "Relocation symbol index out of range!"); + } + } + Res = SymbolRef(Sym, this); + return object_error::success; +} +error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, + uint32_t &Res) const { + InMemoryStruct<macho::RelocationEntry> RE; + getRelocation(Rel, RE); + Res = RE->Word1; + return object_error::success; +} +error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + return object_error::success; +} +error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Res) const { + InMemoryStruct<macho::RelocationEntry> RE; + getRelocation(Rel, RE); + bool isExtern = (RE->Word1 >> 27) & 1; + Res = 0; + if (!isExtern) { + const uint8_t* sectAddress = base(); + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Section64> Sect; + getSection64(Sections[Rel.d.b], Sect); + sectAddress += Sect->Offset; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(Sections[Rel.d.b], Sect); + sectAddress += Sect->Offset; + } + Res = reinterpret_cast<uintptr_t>(sectAddress); + } + return object_error::success; +} +error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + return object_error::success; +} + /*===-- Miscellaneous -----------------------------------------------------===*/ uint8_t MachOObjectFile::getBytesInAddress() const { @@ -465,5 +667,5 @@ unsigned MachOObjectFile::getArch() const { } } +} // end namespace object } // end namespace llvm - diff --git a/contrib/llvm/lib/Object/Makefile b/contrib/llvm/lib/Object/Makefile deleted file mode 100644 index 79388dc..0000000 --- a/contrib/llvm/lib/Object/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMObject -BUILD_ARCHIVE := 1 - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp index 9a373ad..2ea8db9 100644 --- a/contrib/llvm/lib/Object/Object.cpp +++ b/contrib/llvm/lib/Object/Object.cpp @@ -27,8 +27,8 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { } LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) { - ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections(); - return wrap(new ObjectFile::section_iterator(SI)); + section_iterator SI = unwrap(ObjectFile)->begin_sections(); + return wrap(new section_iterator(SI)); } void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index a7798df..69d8ed0 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -45,6 +45,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { case sys::Mach_O_DynamicLinker_FileType: case sys::Mach_O_Bundle_FileType: case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case sys::Mach_O_DSYMCompanion_FileType: return createMachOObjectFile(Object); case sys::COFF_FileType: return createCOFFObjectFile(Object); diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index c64da6e..f238894 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -832,6 +832,7 @@ APFloat::incrementSignificand() /* Our callers should never cause us to overflow. */ assert(carry == 0); + (void)carry; } /* Add the significand of the RHS. Returns the carry flag. */ @@ -926,6 +927,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) APFloat extendedAddend(*addend); status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); assert(status == opOK); + (void)status; lost_fraction = addOrSubtractSignificand(extendedAddend, false); /* Restore our state. */ @@ -1190,7 +1192,7 @@ APFloat::normalize(roundingMode rounding_mode, if (omsb) { /* OMSB is numbered from 1. We want to place it in the integer - bit numbered PRECISON if possible, with a compensating change in + bit numbered PRECISION if possible, with a compensating change in the exponent. */ exponentChange = omsb - semantics->precision; @@ -1389,6 +1391,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* The code above is intended to ensure that no borrow is necessary. */ assert(!carry); + (void)carry; } else { if (bits > 0) { APFloat temp_rhs(rhs); @@ -1402,6 +1405,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* We have a guard bit; generating a carry cannot happen. */ assert(!carry); + (void)carry; } return lost_fraction; @@ -2098,7 +2102,7 @@ APFloat::convertToInteger(APSInt &result, opStatus status = convertToInteger( parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); // Keeps the original signed-ness. - result = APInt(bitWidth, (unsigned)parts.size(), parts.data()); + result = APInt(bitWidth, parts); return status; } @@ -2121,7 +2125,7 @@ APFloat::convertFromUnsignedParts(const integerPart *src, dstCount = partCount(); precision = semantics->precision; - /* We want the most significant PRECISON bits of SRC. There may not + /* We want the most significant PRECISION bits of SRC. There may not be that many; extract what we can. */ if (precision <= omsb) { exponent = omsb - 1; @@ -2192,7 +2196,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, roundingMode rounding_mode) { unsigned int partCount = partCountForBits(width); - APInt api = APInt(width, partCount, parts); + APInt api = APInt(width, makeArrayRef(parts, partCount)); sign = false; if (isSigned && APInt::tcExtractBit(parts, width - 1)) { @@ -2746,7 +2750,7 @@ APFloat::convertF80LongDoubleAPFloatToAPInt() const words[0] = mysignificand; words[1] = ((uint64_t)(sign & 1) << 15) | (myexponent & 0x7fffLL); - return APInt(80, 2, words); + return APInt(80, words); } APInt @@ -2791,7 +2795,7 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const words[1] = ((uint64_t)(sign2 & 1) << 63) | ((myexponent2 & 0x7ff) << 52) | (mysignificand2 & 0xfffffffffffffLL); - return APInt(128, 2, words); + return APInt(128, words); } APInt @@ -2827,7 +2831,7 @@ APFloat::convertQuadrupleAPFloatToAPInt() const ((myexponent & 0x7fff) << 48) | (mysignificand2 & 0xffffffffffffLL); - return APInt(128, 2, words); + return APInt(128, words); } APInt @@ -3239,8 +3243,9 @@ APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { significand[i] = ~((integerPart) 0); // ...and then clear the top bits for internal consistency. - significand[N-1] &= - (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1; + if (Sem.precision % integerPartWidth != 0) + significand[N-1] &= + (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1; return Val; } @@ -3270,7 +3275,7 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { Val.exponent = Sem.minExponent; Val.zeroSignificand(); Val.significandParts()[partCountForBits(Sem.precision)-1] |= - (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)); + (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); return Val; } @@ -3413,8 +3418,8 @@ void APFloat::toString(SmallVectorImpl<char> &Str, // Decompose the number into an APInt and an exponent. int exp = exponent - ((int) semantics->precision - 1); APInt significand(semantics->precision, - partCountForBits(semantics->precision), - significandParts()); + makeArrayRef(significandParts(), + partCountForBits(semantics->precision))); // Set FormatPrecision if zero. We want to do this before we // truncate trailing zeros, as those are part of the precision. @@ -3451,7 +3456,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str, // <= semantics->precision + e * 137 / 59 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) - unsigned precision = semantics->precision + 137 * texp / 59; + unsigned precision = semantics->precision + (137 * texp + 136) / 59; // Multiply significand by 5^e. // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index 76265d4..3774c52 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -48,18 +48,20 @@ inline static uint64_t* getMemory(unsigned numWords) { inline static unsigned getDigit(char cdigit, uint8_t radix) { unsigned r; - if (radix == 16) { + if (radix == 16 || radix == 36) { r = cdigit - '0'; if (r <= 9) return r; r = cdigit - 'A'; - if (r <= 5) + if (r <= radix - 11U) return r + 10; r = cdigit - 'a'; - if (r <= 5) + if (r <= radix - 11U) return r + 10; + + radix = 10; } r = cdigit - '0'; @@ -83,25 +85,33 @@ void APInt::initSlowCase(const APInt& that) { memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); } - -APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) - : BitWidth(numBits), VAL(0) { +void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { assert(BitWidth && "Bitwidth too small"); - assert(bigVal && "Null pointer detected!"); + assert(bigVal.data() && "Null pointer detected!"); if (isSingleWord()) VAL = bigVal[0]; else { // Get memory, cleared to 0 pVal = getClearedMemory(getNumWords()); // Calculate the number of words to copy - unsigned words = std::min<unsigned>(numWords, getNumWords()); + unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); // Copy the words from bigVal to pVal - memcpy(pVal, bigVal, words * APINT_WORD_SIZE); + memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); } // Make sure unused high bits are cleared clearUnusedBits(); } +APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) + : BitWidth(numBits), VAL(0) { + initFromArray(bigVal); +} + +APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) + : BitWidth(numBits), VAL(0) { + initFromArray(makeArrayRef(bigVal, numWords)); +} + APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) : BitWidth(numbits), VAL(0) { assert(BitWidth && "Bitwidth too small"); @@ -376,6 +386,7 @@ APInt& APInt::operator*=(const APInt& RHS) { clearAllBits(); unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); + clearUnusedBits(); // delete dest array and return delete[] dest; @@ -461,7 +472,7 @@ APInt APInt::operator*(const APInt& RHS) const { return APInt(BitWidth, VAL * RHS.VAL); APInt Result(*this); Result *= RHS; - return Result.clearUnusedBits(); + return Result; } APInt APInt::operator+(const APInt& RHS) const { @@ -613,8 +624,9 @@ void APInt::flipBit(unsigned bitPosition) { unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && - "Radix should be 2, 8, 10, or 16!"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + radix == 36) && + "Radix should be 2, 8, 10, 16, or 36!"); size_t slen = str.size(); @@ -636,6 +648,8 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { if (radix == 16) return slen * 4 + isNegative; + // FIXME: base 36 + // This is grossly inefficient but accurate. We could probably do something // with a computation of roughly slen*64/20 and then adjust by the value of // the first few digits. But, I'm not sure how accurate that could be. @@ -644,7 +658,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { // be too large. This avoids the assertion in the constructor. This // calculation doesn't work appropriately for the numbers 0-9, so just use 4 // bits in that case. - unsigned sufficient = slen == 1 ? 4 : slen * 64/18; + unsigned sufficient + = radix == 10? (slen == 1 ? 4 : slen * 64/18) + : (slen == 1 ? 7 : slen * 16/3); // Convert to the actual binary value. APInt tmp(sufficient, StringRef(p, slen), radix); @@ -2107,8 +2123,9 @@ APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const { void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { // Check our assumptions here assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && - "Radix should be 2, 8, 10, or 16!"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + radix == 36) && + "Radix should be 2, 8, 10, 16, or 36!"); StringRef::iterator p = str.begin(); size_t slen = str.size(); @@ -2165,7 +2182,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, bool formatAsCLiteral) const { - assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) && + assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || + Radix == 36) && "Radix should be 2, 8, 10, or 16!"); const char *Prefix = ""; @@ -2195,7 +2213,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, return; } - static const char Digits[] = "0123456789ABCDEF"; + static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; if (isSingleWord()) { char Buffer[65]; @@ -2249,7 +2267,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, // For the 2, 8 and 16 bit cases, we can just shift instead of divide // because the number of bits per digit (1, 3 and 4 respectively) divides // equaly. We just shift until the value is zero. - if (Radix != 10) { + if (Radix == 2 || Radix == 8 || Radix == 16) { // Just shift tmp right for each digit width until it becomes zero unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); unsigned MaskAmt = Radix - 1; @@ -2260,7 +2278,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, Tmp = Tmp.lshr(ShiftAmt); } } else { - APInt divisor(4, 10); + APInt divisor(Radix == 10? 4 : 8, Radix); while (Tmp != 0) { APInt APdigit(1, 0); APInt tmp2(Tmp.getBitWidth(), 0); diff --git a/contrib/llvm/lib/Support/Atomic.cpp b/contrib/llvm/lib/Support/Atomic.cpp index 8214521..94760cc 100644 --- a/contrib/llvm/lib/Support/Atomic.cpp +++ b/contrib/llvm/lib/Support/Atomic.cpp @@ -22,7 +22,7 @@ using namespace llvm; #endif void sys::MemoryFence() { -#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0 +#if LLVM_HAS_ATOMICS == 0 return; #else # if defined(__GNUC__) @@ -38,7 +38,7 @@ void sys::MemoryFence() { sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, sys::cas_flag new_value, sys::cas_flag old_value) { -#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0 +#if LLVM_HAS_ATOMICS == 0 sys::cas_flag result = *ptr; if (result == old_value) *ptr = new_value; @@ -53,7 +53,7 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, } sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) { -#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0 +#if LLVM_HAS_ATOMICS == 0 ++(*ptr); return *ptr; #elif defined(__GNUC__) @@ -66,7 +66,7 @@ sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) { } sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) { -#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0 +#if LLVM_HAS_ATOMICS == 0 --(*ptr); return *ptr; #elif defined(__GNUC__) @@ -79,7 +79,7 @@ sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) { } sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) { -#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0 +#if LLVM_HAS_ATOMICS == 0 *ptr += val; return *ptr; #elif defined(__GNUC__) diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp new file mode 100644 index 0000000..a63bf83 --- /dev/null +++ b/contrib/llvm/lib/Support/BlockFrequency.cpp @@ -0,0 +1,126 @@ +//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Block Frequency class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; + +namespace { + +/// mult96bit - Multiply FREQ by N and store result in W array. +void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) { + uint64_t u0 = freq & UINT32_MAX; + uint64_t u1 = freq >> 32; + + // Represent 96-bit value as w[2]:w[1]:w[0]; + uint32_t w[3] = { 0, 0, 0 }; + + uint64_t t = u0 * N; + uint64_t k = t >> 32; + w[0] = t; + t = u1 * N + k; + w[1] = t; + w[2] = t >> 32; + + // W[1] - higher bits. + // W[0] - lower bits. + W[0] = w[0] + ((uint64_t) w[1] << 32); + W[1] = w[2]; +} + + +/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency. +uint64_t div96bit(uint64_t W[2], uint32_t D) { + uint64_t y = W[0]; + uint64_t x = W[1]; + int i; + + for (i = 1; i <= 64 && x; ++i) { + uint32_t t = (int)x >> 31; + x = (x << 1) | (y >> 63); + y = y << 1; + if ((x | t) >= D) { + x -= D; + ++y; + } + } + + return y << (64 - i + 1); +} + +} + + +BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { + uint32_t n = Prob.getNumerator(); + uint32_t d = Prob.getDenominator(); + + assert(n <= d && "Probability must be less or equal to 1."); + + // If we can overflow use 96-bit operations. + if (n > 0 && Frequency > UINT64_MAX / n) { + // 96-bit value represented as W[1]:W[0]. + uint64_t W[2]; + + // Probability is less or equal to 1 which means that results must fit + // 64-bit. + mult96bit(Frequency, n, W); + Frequency = div96bit(W, d); + return *this; + } + + Frequency *= n; + Frequency /= d; + return *this; +} + +const BlockFrequency +BlockFrequency::operator*(const BranchProbability &Prob) const { + BlockFrequency Freq(Frequency); + Freq *= Prob; + return Freq; +} + +BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { + uint64_t Before = Freq.Frequency; + Frequency += Freq.Frequency; + + // If overflow, set frequency to the maximum value. + if (Frequency < Before) + Frequency = UINT64_MAX; + + return *this; +} + +const BlockFrequency +BlockFrequency::operator+(const BlockFrequency &Prob) const { + BlockFrequency Freq(Frequency); + Freq += Prob; + return Freq; +} + +void BlockFrequency::print(raw_ostream &OS) const { + OS << Frequency; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) { + Freq.print(OS); + return OS; +} + +} diff --git a/contrib/llvm/lib/Support/BranchProbability.cpp b/contrib/llvm/lib/Support/BranchProbability.cpp index 97342da..49d04ed 100644 --- a/contrib/llvm/lib/Support/BranchProbability.cpp +++ b/contrib/llvm/lib/Support/BranchProbability.cpp @@ -24,9 +24,8 @@ BranchProbability::BranchProbability(uint32_t n, uint32_t d) { D = d; } -raw_ostream &BranchProbability::print(raw_ostream &OS) const { +void BranchProbability::print(raw_ostream &OS) const { OS << N << " / " << D << " = " << ((double)N / D); - return OS; } void BranchProbability::dump() const { diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp index 92c60a9..238adcc 100644 --- a/contrib/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm/lib/Support/CommandLine.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/ADT/OwningPtr.h" @@ -45,6 +44,7 @@ TEMPLATE_INSTANTIATION(class basic_parser<bool>); TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>); TEMPLATE_INSTANTIATION(class basic_parser<int>); TEMPLATE_INSTANTIATION(class basic_parser<unsigned>); +TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>); TEMPLATE_INSTANTIATION(class basic_parser<double>); TEMPLATE_INSTANTIATION(class basic_parser<float>); TEMPLATE_INSTANTIATION(class basic_parser<std::string>); @@ -63,6 +63,7 @@ void parser<bool>::anchor() {} void parser<boolOrDefault>::anchor() {} void parser<int>::anchor() {} void parser<unsigned>::anchor() {} +void parser<unsigned long long>::anchor() {} void parser<double>::anchor() {} void parser<float>::anchor() {} void parser<std::string>::anchor() {} @@ -1006,6 +1007,16 @@ bool parser<unsigned>::parse(Option &O, StringRef ArgName, return false; } +// parser<unsigned long long> implementation +// +bool parser<unsigned long long>::parse(Option &O, StringRef ArgName, + StringRef Arg, unsigned long long &Value){ + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); + return false; +} + // parser<double>/parser<float> implementation // static bool parseDouble(Option &O, StringRef Arg, double &Value) { @@ -1151,6 +1162,7 @@ PRINT_OPT_DIFF(bool) PRINT_OPT_DIFF(boolOrDefault) PRINT_OPT_DIFF(int) PRINT_OPT_DIFF(unsigned) +PRINT_OPT_DIFF(unsigned long long) PRINT_OPT_DIFF(double) PRINT_OPT_DIFF(float) PRINT_OPT_DIFF(char) @@ -1330,10 +1342,7 @@ void cl::PrintOptionValues() { static void (*OverrideVersionPrinter)() = 0; -static int TargetArraySortFn(const void *LHS, const void *RHS) { - typedef std::pair<const char *, const Target*> pair_ty; - return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first); -} +static std::vector<void (*)()>* ExtraVersionPrinters = 0; namespace { class VersionPrinter { @@ -1357,41 +1366,31 @@ public: std::string CPU = sys::getHostCPUName(); if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" -#if defined(ENABLE_TIMESTAMPS) && ENABLE_TIMESTAMPS == 1 +#if (ENABLE_TIMESTAMPS == 1) << " Built " << __DATE__ << " (" << __TIME__ << ").\n" #endif << " Host: " << sys::getHostTriple() << '\n' - << " Host CPU: " << CPU << '\n' - << '\n' - << " Registered Targets:\n"; - - std::vector<std::pair<const char *, const Target*> > Targets; - size_t Width = 0; - for (TargetRegistry::iterator it = TargetRegistry::begin(), - ie = TargetRegistry::end(); it != ie; ++it) { - Targets.push_back(std::make_pair(it->getName(), &*it)); - Width = std::max(Width, strlen(Targets.back().first)); - } - if (!Targets.empty()) - qsort(&Targets[0], Targets.size(), sizeof(Targets[0]), - TargetArraySortFn); - - for (unsigned i = 0, e = Targets.size(); i != e; ++i) { - OS << " " << Targets[i].first; - OS.indent(Width - strlen(Targets[i].first)) << " - " - << Targets[i].second->getShortDescription() << '\n'; - } - if (Targets.empty()) - OS << " (none)\n"; + << " Host CPU: " << CPU << '\n'; } void operator=(bool OptionWasSpecified) { if (!OptionWasSpecified) return; - if (OverrideVersionPrinter == 0) { - print(); + if (OverrideVersionPrinter != 0) { + (*OverrideVersionPrinter)(); exit(1); } - (*OverrideVersionPrinter)(); + print(); + + // Iterate over any registered extra printers and call them to add further + // information. + if (ExtraVersionPrinters != 0) { + outs() << '\n'; + for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(), + E = ExtraVersionPrinters->end(); + I != E; ++I) + (*I)(); + } + exit(1); } }; @@ -1424,3 +1423,10 @@ void cl::PrintVersionMessage() { void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; } + +void cl::AddExtraVersionPrinter(void (*func)()) { + if (ExtraVersionPrinters == 0) + ExtraVersionPrinters = new std::vector<void (*)()>; + + ExtraVersionPrinters->push_back(func); +} diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp index 81382d0..c29cb53 100644 --- a/contrib/llvm/lib/Support/ConstantRange.cpp +++ b/contrib/llvm/lib/Support/ConstantRange.cpp @@ -21,11 +21,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" +#include "llvm/InstrTypes.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Instructions.h" using namespace llvm; /// Initialize a full (the default) or empty set for the specified type. @@ -56,56 +55,56 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred, uint32_t W = CR.getBitWidth(); switch (Pred) { - default: assert(!"Invalid ICmp predicate to makeICmpRegion()"); - case ICmpInst::ICMP_EQ: + default: assert(0 && "Invalid ICmp predicate to makeICmpRegion()"); + case CmpInst::ICMP_EQ: return CR; - case ICmpInst::ICMP_NE: + case CmpInst::ICMP_NE: if (CR.isSingleElement()) return ConstantRange(CR.getUpper(), CR.getLower()); return ConstantRange(W); - case ICmpInst::ICMP_ULT: { + case CmpInst::ICMP_ULT: { APInt UMax(CR.getUnsignedMax()); if (UMax.isMinValue()) return ConstantRange(W, /* empty */ false); return ConstantRange(APInt::getMinValue(W), UMax); } - case ICmpInst::ICMP_SLT: { + case CmpInst::ICMP_SLT: { APInt SMax(CR.getSignedMax()); if (SMax.isMinSignedValue()) return ConstantRange(W, /* empty */ false); return ConstantRange(APInt::getSignedMinValue(W), SMax); } - case ICmpInst::ICMP_ULE: { + case CmpInst::ICMP_ULE: { APInt UMax(CR.getUnsignedMax()); if (UMax.isMaxValue()) return ConstantRange(W); return ConstantRange(APInt::getMinValue(W), UMax + 1); } - case ICmpInst::ICMP_SLE: { + case CmpInst::ICMP_SLE: { APInt SMax(CR.getSignedMax()); if (SMax.isMaxSignedValue()) return ConstantRange(W); return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); } - case ICmpInst::ICMP_UGT: { + case CmpInst::ICMP_UGT: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMaxValue()) return ConstantRange(W, /* empty */ false); return ConstantRange(UMin + 1, APInt::getNullValue(W)); } - case ICmpInst::ICMP_SGT: { + case CmpInst::ICMP_SGT: { APInt SMin(CR.getSignedMin()); if (SMin.isMaxSignedValue()) return ConstantRange(W, /* empty */ false); return ConstantRange(SMin + 1, APInt::getSignedMinValue(W)); } - case ICmpInst::ICMP_UGE: { + case CmpInst::ICMP_UGE: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMinValue()) return ConstantRange(W); return ConstantRange(UMin, APInt::getNullValue(W)); } - case ICmpInst::ICMP_SGE: { + case CmpInst::ICMP_SGE: { APInt SMin(CR.getSignedMin()); if (SMin.isMinSignedValue()) return ConstantRange(W); diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp index 899c389..263114c 100644 --- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp @@ -12,6 +12,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" +#include "llvm/Support/ErrorHandling.h" #include <setjmp.h> #include <cstdio> using namespace llvm; @@ -123,7 +124,56 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { #ifdef LLVM_ON_WIN32 -// FIXME: No real Win32 implementation currently. +#include "Windows/Windows.h" + +// On Windows, we can make use of vectored exception handling to +// catch most crashing situations. Note that this does mean +// we will be alerted of exceptions *before* structured exception +// handling has the opportunity to catch it. But that isn't likely +// to cause problems because nowhere in the project is SEH being +// used. +// +// Vectored exception handling is built on top of SEH, and so it +// works on a per-thread basis. +// +// The vectored exception handler functionality was added in Windows +// XP, so if support for older versions of Windows is required, +// it will have to be added. +// +// If we want to support as far back as Win2k, we could use the +// SetUnhandledExceptionFilter API, but there's a risk of that +// being entirely overwritten (it's not a chain). + +static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) +{ + // Lookup the current thread local recovery object. + const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + + if (!CRCI) { + // Something has gone horribly wrong, so let's just tell everyone + // to keep searching + CrashRecoveryContext::Disable(); + return EXCEPTION_CONTINUE_SEARCH; + } + + // TODO: We can capture the stack backtrace here and store it on the + // implementation if we so choose. + + // Handle the crash + const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash(); + + // Note that we don't actually get here because HandleCrash calls + // longjmp, which means the HandleCrash function never returns. + llvm_unreachable("Handled the crash, should have longjmp'ed out of here"); + return EXCEPTION_CONTINUE_SEARCH; +} + +// Because the Enable and Disable calls are static, it means that +// there may not actually be an Impl available, or even a current +// CrashRecoveryContext at all. So we make use of a thread-local +// exception table. The handles contained in here will either be +// non-NULL, valid VEH handles, or NULL. +static sys::ThreadLocal<const void> sCurrentExceptionHandle; void CrashRecoveryContext::Enable() { sys::ScopedLock L(gCrashRecoveryContexMutex); @@ -132,6 +182,13 @@ void CrashRecoveryContext::Enable() { return; gCrashRecoveryEnabled = true; + + // We can set up vectored exception handling now. We will install our + // handler as the front of the list, though there's no assurances that + // it will remain at the front (another call could install itself before + // our handler). This 1) isn't likely, and 2) shouldn't cause problems. + PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler); + sCurrentExceptionHandle.set(handle); } void CrashRecoveryContext::Disable() { @@ -141,6 +198,15 @@ void CrashRecoveryContext::Disable() { return; gCrashRecoveryEnabled = false; + + PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get()); + if (currentHandle) { + // Now we can remove the vectored exception handler from the chain + ::RemoveVectoredExceptionHandler(currentHandle); + + // Reset the handle in our thread-local set. + sCurrentExceptionHandle.set(NULL); + } } #else diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp new file mode 100644 index 0000000..b946c1d --- /dev/null +++ b/contrib/llvm/lib/Support/DataExtractor.cpp @@ -0,0 +1,175 @@ +//===-- DataExtractor.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" +using namespace llvm; + +template <typename T> +static T getU(uint32_t *offset_ptr, const DataExtractor *de, + bool isLittleEndian, const char *Data) { + T val = 0; + uint32_t offset = *offset_ptr; + if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::isLittleEndianHost() != isLittleEndian) + val = sys::SwapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); + } + return val; +} + +template <typename T> +static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, + const DataExtractor *de, bool isLittleEndian, const char *Data){ + uint32_t offset = *offset_ptr; + + if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; + } + return NULL; +} + +uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { + return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint8_t * +DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { + return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + + +uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { + return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, + uint32_t count) const { + return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + +uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { + return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, + uint32_t count) const { + return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data());; +} + +uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { + return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, + uint32_t count) const { + return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + +uint64_t +DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return getU8(offset_ptr); + case 2: + return getU16(offset_ptr); + case 4: + return getU32(offset_ptr); + case 8: + return getU64(offset_ptr); + } + llvm_unreachable("getUnsigned unhandled case!"); +} + +int64_t +DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return (int8_t)getU8(offset_ptr); + case 2: + return (int16_t)getU16(offset_ptr); + case 4: + return (int32_t)getU32(offset_ptr); + case 8: + return (int64_t)getU64(offset_ptr); + } + llvm_unreachable("getSigned unhandled case!"); +} + +const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { + uint32_t offset = *offset_ptr; + StringRef::size_type pos = Data.find('\0', offset); + if (pos != StringRef::npos) { + *offset_ptr = pos + 1; + return Data.data() + offset; + } + return NULL; +} + +uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { + uint64_t result = 0; + if (Data.empty()) + return 0; + + unsigned shift = 0; + uint32_t offset = *offset_ptr; + uint8_t byte = 0; + + while (isValidOffset(offset)) { + byte = Data[offset++]; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) + break; + } + + *offset_ptr = offset; + return result; +} + +int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { + int64_t result = 0; + if (Data.empty()) + return 0; + + unsigned shift = 0; + uint32_t offset = *offset_ptr; + uint8_t byte = 0; + + while (isValidOffset(offset)) { + byte = Data[offset++]; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) + break; + } + + // Sign bit of byte is 2nd high order bit (0x40) + if (shift < 64 && (byte & 0x40)) + result |= -(1 << shift); + + *offset_ptr = offset; + return result; +} diff --git a/contrib/llvm/lib/Support/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp index 6362aff..c6d73bc 100644 --- a/contrib/llvm/lib/Support/Disassembler.cpp +++ b/contrib/llvm/lib/Support/Disassembler.cpp @@ -1,4 +1,4 @@ -//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===// +//===- lib/Support/Disassembler.cpp -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp index 0813321..95a9550 100644 --- a/contrib/llvm/lib/Support/Dwarf.cpp +++ b/contrib/llvm/lib/Support/Dwarf.cpp @@ -82,6 +82,19 @@ const char *llvm::dwarf::TagString(unsigned Tag) { case DW_TAG_arg_variable: return "DW_TAG_arg_variable"; case DW_TAG_return_variable: return "DW_TAG_return_variable"; case DW_TAG_vector_type: return "DW_TAG_vector_type"; + case DW_TAG_rvalue_reference_type: return "DW_TAG_rvalue_reference_type"; + case DW_TAG_template_alias: return "DW_TAG_template_alias"; + case DW_TAG_MIPS_loop: return "DW_TAG_MIPS_loop"; + case DW_TAG_type_unit: return "DW_TAG_type_unit"; + case DW_TAG_format_label: return "DW_TAG_format_label"; + case DW_TAG_function_template: return "DW_TAG_function_template"; + case DW_TAG_class_template: return "DW_TAG_class_template"; + case DW_TAG_GNU_template_template_param: + return "DW_TAG_GNU_template_template_param"; + case DW_TAG_GNU_template_parameter_pack: + return "DW_TAG_GNU_template_parameter_pack"; + case DW_TAG_GNU_formal_parameter_pack: + return "DW_TAG_GNU_formal_parameter_pack"; } return 0; } @@ -186,7 +199,30 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_elemental: return "DW_AT_elemental"; case DW_AT_pure: return "DW_AT_pure"; case DW_AT_recursive: return "DW_AT_recursive"; + case DW_AT_signature: return "DW_AT_signature"; + case DW_AT_main_subprogram: return "DW_AT_main_subprogram"; + case DW_AT_data_bit_offset: return "DW_AT_data_bit_offset"; + case DW_AT_const_expr: return "DW_AT_const_expr"; + case DW_AT_enum_class: return "DW_AT_enum_class"; + case DW_AT_linkage_name: return "DW_AT_linkage_name"; + case DW_AT_MIPS_loop_begin: return "DW_AT_MIPS_loop_begin"; + case DW_AT_MIPS_tail_loop_begin: return "DW_AT_MIPS_tail_loop_begin"; + case DW_AT_MIPS_epilog_begin: return "DW_AT_MIPS_epilog_begin"; + case DW_AT_MIPS_loop_unroll_factor: return "DW_AT_MIPS_loop_unroll_factor"; + case DW_AT_MIPS_software_pipeline_depth: + return "DW_AT_MIPS_software_pipeline_depth"; case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name"; + case DW_AT_MIPS_stride: return "DW_AT_MIPS_stride"; + case DW_AT_MIPS_abstract_name: return "DW_AT_MIPS_abstract_name"; + case DW_AT_MIPS_clone_origin: return "DW_AT_MIPS_clone_origin"; + case DW_AT_MIPS_has_inlines: return "DW_AT_MIPS_has_inlines"; + case DW_AT_MIPS_stride_byte: return "DW_AT_MIPS_stride_byte"; + case DW_AT_MIPS_stride_elem: return "DW_AT_MIPS_stride_elem"; + case DW_AT_MIPS_ptr_dopetype: return "DW_AT_MIPS_ptr_dopetype"; + case DW_AT_MIPS_allocatable_dopetype: + return "DW_AT_MIPS_allocatable_dopetype"; + case DW_AT_MIPS_assumed_shape_dopetype: + return "DW_AT_MIPS_assumed_shape_dopetype"; case DW_AT_sf_names: return "DW_AT_sf_names"; case DW_AT_src_info: return "DW_AT_src_info"; case DW_AT_mac_info: return "DW_AT_mac_info"; @@ -194,6 +230,8 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_body_begin: return "DW_AT_body_begin"; case DW_AT_body_end: return "DW_AT_body_end"; case DW_AT_GNU_vector: return "DW_AT_GNU_vector"; + case DW_AT_GNU_template_name: return "DW_AT_GNU_template_name"; + case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size"; case DW_AT_lo_user: return "DW_AT_lo_user"; case DW_AT_hi_user: return "DW_AT_hi_user"; case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized"; @@ -237,6 +275,10 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { case DW_FORM_ref8: return "DW_FORM_ref8"; case DW_FORM_ref_udata: return "DW_FORM_ref_udata"; case DW_FORM_indirect: return "DW_FORM_indirect"; + case DW_FORM_sec_offset: return "DW_FORM_sec_offset"; + case DW_FORM_exprloc: return "DW_FORM_exprloc"; + case DW_FORM_flag_present: return "DW_FORM_flag_present"; + case DW_FORM_ref_sig8: return "DW_FORM_ref_sig8"; } return 0; } @@ -397,6 +439,8 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { case DW_OP_form_tls_address: return "DW_OP_form_tls_address"; case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa"; case DW_OP_bit_piece: return "DW_OP_bit_piece"; + case DW_OP_implicit_value: return "DW_OP_implicit_value"; + case DW_OP_stack_value: return "DW_OP_stack_value"; case DW_OP_lo_user: return "DW_OP_lo_user"; case DW_OP_hi_user: return "DW_OP_hi_user"; } @@ -416,6 +460,7 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { case DW_ATE_unsigned: return "DW_ATE_unsigned"; case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char"; case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float"; + case DW_ATE_UTF: return "DW_ATE_UTF"; case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal"; case DW_ATE_numeric_string: return "DW_ATE_numeric_string"; case DW_ATE_edited: return "DW_ATE_edited"; @@ -602,6 +647,7 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { case DW_LNE_end_sequence: return "DW_LNE_end_sequence"; case DW_LNE_set_address: return "DW_LNE_set_address"; case DW_LNE_define_file: return "DW_LNE_define_file"; + case DW_LNE_set_discriminator: return "DW_LNE_set_discriminator"; case DW_LNE_lo_user: return "DW_LNE_lo_user"; case DW_LNE_hi_user: return "DW_LNE_hi_user"; } @@ -651,6 +697,9 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) { case DW_CFA_val_offset: return "DW_CFA_val_offset"; case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf"; case DW_CFA_val_expression: return "DW_CFA_val_expression"; + case DW_CFA_MIPS_advance_loc8: return "DW_CFA_MIPS_advance_loc8"; + case DW_CFA_GNU_window_save: return "DW_CFA_GNU_window_save"; + case DW_CFA_GNU_args_size: return "DW_CFA_GNU_args_size"; case DW_CFA_lo_user: return "DW_CFA_lo_user"; case DW_CFA_hi_user: return "DW_CFA_hi_user"; } diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp index 455c380..fb02c07 100644 --- a/contrib/llvm/lib/Support/DynamicLibrary.cpp +++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp @@ -9,28 +9,26 @@ // // This header file implements the operating system DynamicLibrary concept. // -// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is -// not thread safe! +// FIXME: This file leaks ExplicitSymbols and OpenedHandles! // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Mutex.h" #include "llvm/Config/config.h" #include <cstdio> #include <cstring> -#include <map> -#include <vector> // Collection of symbol name/value pairs to be searched prior to any libraries. -static std::map<std::string, void*> *ExplicitSymbols = 0; +static llvm::StringMap<void *> *ExplicitSymbols = 0; namespace { struct ExplicitSymbolsDeleter { ~ExplicitSymbolsDeleter() { - if (ExplicitSymbols) - delete ExplicitSymbols; + delete ExplicitSymbols; } }; @@ -38,13 +36,22 @@ struct ExplicitSymbolsDeleter { static ExplicitSymbolsDeleter Dummy; -void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName, + +static llvm::sys::SmartMutex<true>& getMutex() { + static llvm::sys::SmartMutex<true> HandlesMutex; + return HandlesMutex; +} + +void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, void *symbolValue) { + SmartScopedLock<true> lock(getMutex()); if (ExplicitSymbols == 0) - ExplicitSymbols = new std::map<std::string, void*>(); + ExplicitSymbols = new llvm::StringMap<void*>(); (*ExplicitSymbols)[symbolName] = symbolValue; } +char llvm::sys::DynamicLibrary::Invalid = 0; + #ifdef LLVM_ON_WIN32 #include "Windows/DynamicLibrary.inc" @@ -61,66 +68,78 @@ using namespace llvm::sys; //=== independent code. //===----------------------------------------------------------------------===// -static std::vector<void *> *OpenedHandles = 0; - - -static SmartMutex<true>& getMutex() { - static SmartMutex<true> HandlesMutex; - return HandlesMutex; -} +static DenseSet<void *> *OpenedHandles = 0; +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, + std::string *errMsg) { + SmartScopedLock<true> lock(getMutex()); -bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, - std::string *ErrMsg) { - void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL); - if (H == 0) { - if (ErrMsg) *ErrMsg = dlerror(); - return true; + void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); + if (handle == 0) { + if (errMsg) *errMsg = dlerror(); + return DynamicLibrary(); } + #ifdef __CYGWIN__ // Cygwin searches symbols only in the main // with the handle of dlopen(NULL, RTLD_GLOBAL). - if (Filename == NULL) - H = RTLD_DEFAULT; + if (filename == NULL) + handle = RTLD_DEFAULT; #endif - SmartScopedLock<true> Lock(getMutex()); + if (OpenedHandles == 0) - OpenedHandles = new std::vector<void *>(); - OpenedHandles->push_back(H); - return false; + OpenedHandles = new DenseSet<void *>(); + + // If we've already loaded this library, dlclose() the handle in order to + // keep the internal refcount at +1. + if (!OpenedHandles->insert(handle).second) + dlclose(handle); + + return DynamicLibrary(handle); +} + +void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { + if (!isValid()) + return NULL; + return dlsym(Data, symbolName); } + #else using namespace llvm; using namespace llvm::sys; -bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, - std::string *ErrMsg) { - if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform"; - return true; +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, + std::string *errMsg) { + if (errMsg) *errMsg = "dlopen() not supported on this platform"; + return DynamicLibrary(); } + +void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { + return NULL; +} + #endif namespace llvm { void *SearchForAddressOfSpecialSymbol(const char* symbolName); } -void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { +void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { + SmartScopedLock<true> Lock(getMutex()); + // First check symbols added via AddSymbol(). if (ExplicitSymbols) { - std::map<std::string, void *>::iterator I = - ExplicitSymbols->find(symbolName); - std::map<std::string, void *>::iterator E = ExplicitSymbols->end(); + StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); - if (I != E) - return I->second; + if (i != ExplicitSymbols->end()) + return i->second; } #if HAVE_DLFCN_H // Now search the libraries. - SmartScopedLock<true> Lock(getMutex()); if (OpenedHandles) { - for (std::vector<void *>::iterator I = OpenedHandles->begin(), + for (DenseSet<void *>::iterator I = OpenedHandles->begin(), E = OpenedHandles->end(); I != E; ++I) { //lt_ptr ptr = lt_dlsym(*I, symbolName); void *ptr = dlsym(*I, symbolName); diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp index 1568342..17b8271 100644 --- a/contrib/llvm/lib/Support/FoldingSet.cpp +++ b/contrib/llvm/lib/Support/FoldingSet.cpp @@ -64,10 +64,8 @@ void FoldingSetNodeID::AddPointer(const void *Ptr) { // depend on the host. It doesn't matter however, because hashing on // pointer values in inherently unstable. Nothing should depend on the // ordering of nodes in the folding set. - intptr_t PtrI = (intptr_t)Ptr; - Bits.push_back(unsigned(PtrI)); - if (sizeof(intptr_t) > sizeof(unsigned)) - Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); + Bits.append(reinterpret_cast<unsigned *>(&Ptr), + reinterpret_cast<unsigned *>(&Ptr+1)); } void FoldingSetNodeID::AddInteger(signed I) { Bits.push_back(I); diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index c525a12..a19e4b4 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -213,13 +213,13 @@ std::string sys::getHostCPUName() { case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 37: // Intel Core i7, laptop version. + case 44: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. return "corei7"; // SandyBridge: case 42: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. - case 44: // Intel Core i7 processor and Intel Xeon processor. All - // processors are manufactured using the 32 nm process. case 45: return "corei7-avx"; diff --git a/contrib/llvm/lib/Support/IncludeFile.cpp b/contrib/llvm/lib/Support/IncludeFile.cpp index 5da8826..e67acb3 100644 --- a/contrib/llvm/lib/Support/IncludeFile.cpp +++ b/contrib/llvm/lib/Support/IncludeFile.cpp @@ -1,4 +1,4 @@ -//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===// +//===- lib/Support/IncludeFile.cpp - Ensure Linking Of Implementation -----===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Support/Memory.cpp b/contrib/llvm/lib/Support/Memory.cpp index ac7af0a..ceaaa99 100644 --- a/contrib/llvm/lib/Support/Memory.cpp +++ b/contrib/llvm/lib/Support/Memory.cpp @@ -16,6 +16,10 @@ #include "llvm/Support/Valgrind.h" #include "llvm/Config/config.h" +#if defined(__mips__) +#include <sys/cachectl.h> +#endif + namespace llvm { using namespace sys; } @@ -66,6 +70,8 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, char *Start = (char*) Addr; char *End = Start + Len; __clear_cache(Start, End); +# elif defined(__mips__) + cacheflush((char*)Addr, Len, BCACHE); # endif #endif // end apple diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp index d264be9..0771af5 100644 --- a/contrib/llvm/lib/Support/MemoryBuffer.cpp +++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp @@ -275,16 +275,16 @@ static bool shouldUseMmap(int FD, error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, OwningPtr<MemoryBuffer> &result, - size_t FileSize, size_t MapSize, - off_t Offset, + uint64_t FileSize, uint64_t MapSize, + int64_t Offset, bool RequiresNullTerminator) { static int PageSize = sys::Process::GetPageSize(); // Default is to map the full file. - if (MapSize == size_t(-1)) { + if (MapSize == uint64_t(-1)) { // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. - if (FileSize == size_t(-1)) { + if (FileSize == uint64_t(-1)) { struct stat FileInfo; // TODO: This should use fstat64 when available. if (fstat(FD, &FileInfo) == -1) { diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp index 91e3ecd..b20ab89 100644 --- a/contrib/llvm/lib/Support/MemoryObject.cpp +++ b/contrib/llvm/lib/Support/MemoryObject.cpp @@ -19,8 +19,11 @@ int MemoryObject::readBytes(uint64_t address, uint64_t* copied) const { uint64_t current = address; uint64_t limit = getBase() + getExtent(); - - while (current - address < size && current < limit) { + + if (current + size > limit) + return -1; + + while (current - address < size) { if (readByte(current, &buf[(current - address)])) return -1; diff --git a/contrib/llvm/lib/Support/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp index b408973..8874e94 100644 --- a/contrib/llvm/lib/Support/Mutex.cpp +++ b/contrib/llvm/lib/Support/Mutex.cpp @@ -152,6 +152,6 @@ MutexImpl::tryacquire() #elif defined( LLVM_ON_WIN32) #include "Windows/Mutex.inc" #else -#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp +#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp #endif #endif diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp index 8fbaf2d..e5b7cd3 100644 --- a/contrib/llvm/lib/Support/Path.cpp +++ b/contrib/llvm/lib/Support/Path.cpp @@ -121,7 +121,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case 7: return Mach_O_DynamicLinker_FileType; case 8: return Mach_O_Bundle_FileType; case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType; - case 10: break; // FIXME: MH_DSYM companion file with only debug. + case 10: return Mach_O_DSYMCompanion_FileType; } break; } diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp index 896c94c..bebe442 100644 --- a/contrib/llvm/lib/Support/PathV2.cpp +++ b/contrib/llvm/lib/Support/PathV2.cpp @@ -490,6 +490,36 @@ bool is_separator(char value) { } } +void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { + result.clear(); + + // Check whether the temporary directory is specified by an environment + // variable. + const char *EnvironmentVariable; +#ifdef LLVM_ON_WIN32 + EnvironmentVariable = "TEMP"; +#else + EnvironmentVariable = "TMPDIR"; +#endif + if (char *RequestedDir = getenv(EnvironmentVariable)) { + result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); + return; + } + + // Fall back to a system default. + const char *DefaultResult; +#ifdef LLVM_ON_WIN32 + (void)erasedOnReboot; + DefaultResult = "C:\\TEMP"; +#else + if (erasedOnReboot) + DefaultResult = "/tmp"; + else + DefaultResult = "/var/tmp"; +#endif + result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); +} + bool has_root_name(const Twine &path) { SmallString<128> path_storage; StringRef p = path.toStringRef(path_storage); @@ -626,7 +656,7 @@ error_code create_directories(const Twine &path, bool &existed) { if (error_code ec = fs::exists(parent, parent_exists)) return ec; if (!parent_exists) - return create_directories(parent, existed); + if (error_code ec = create_directories(parent, existed)) return ec; return create_directory(p, existed); } @@ -682,14 +712,12 @@ bool is_other(file_status status) { !is_symlink(status); } -void directory_entry::replace_filename(const Twine &filename, file_status st, - file_status symlink_st) { +void directory_entry::replace_filename(const Twine &filename, file_status st) { SmallString<128> path(Path.begin(), Path.end()); path::remove_filename(path); path::append(path, filename); Path = path.str(); Status = st; - SymlinkStatus = symlink_st; } error_code has_magic(const Twine &path, const Twine &magic, bool &result) { diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp index 082b701..ef33073 100644 --- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp +++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp @@ -62,7 +62,7 @@ extern "C" { CRASH_REPORTER_CLIENT_HIDDEN struct crashreporter_annotations_t gCRAnnotations __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) - = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 }; + = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 }; } #elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO static const char *__crashreporter_info__ = 0; diff --git a/contrib/llvm/lib/Support/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp index fc02f9c..d0b1e10 100644 --- a/contrib/llvm/lib/Support/RWMutex.cpp +++ b/contrib/llvm/lib/Support/RWMutex.cpp @@ -152,6 +152,6 @@ RWMutexImpl::writer_release() #elif defined( LLVM_ON_WIN32) #include "Windows/RWMutex.inc" #else -#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp +#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp #endif #endif diff --git a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp index d638301..2d23902 100644 --- a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp +++ b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp @@ -28,21 +28,6 @@ static void *DoSearch(const char* symbolName) { #ifdef __APPLE__ { - EXPLICIT_SYMBOL(__ashldi3); - EXPLICIT_SYMBOL(__ashrdi3); - EXPLICIT_SYMBOL(__cmpdi2); - EXPLICIT_SYMBOL(__divdi3); - EXPLICIT_SYMBOL(__fixdfdi); - EXPLICIT_SYMBOL(__fixsfdi); - EXPLICIT_SYMBOL(__fixunsdfdi); - EXPLICIT_SYMBOL(__fixunssfdi); - EXPLICIT_SYMBOL(__floatdidf); - EXPLICIT_SYMBOL(__floatdisf); - EXPLICIT_SYMBOL(__lshrdi3); - EXPLICIT_SYMBOL(__moddi3); - EXPLICIT_SYMBOL(__udivdi3); - EXPLICIT_SYMBOL(__umoddi3); - // __eprintf is sometimes used for assert() handling on x86. // // FIXME: Currently disabled when using Clang, as we don't always have our diff --git a/contrib/llvm/lib/Support/StringExtras.cpp b/contrib/llvm/lib/Support/StringExtras.cpp index eb2fa08..49c5ac4 100644 --- a/contrib/llvm/lib/Support/StringExtras.cpp +++ b/contrib/llvm/lib/Support/StringExtras.cpp @@ -51,11 +51,10 @@ std::pair<StringRef, StringRef> llvm::getToken(StringRef Source, void llvm::SplitString(StringRef Source, SmallVectorImpl<StringRef> &OutFragments, StringRef Delimiters) { - StringRef S2, S; - tie(S2, S) = getToken(Source, Delimiters); - while (!S2.empty()) { - OutFragments.push_back(S2); - tie(S2, S) = getToken(S, Delimiters); + std::pair<StringRef, StringRef> S = getToken(Source, Delimiters); + while (!S.first.empty()) { + OutFragments.push_back(S.first); + S = getToken(S.second, Delimiters); } } diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp index 8c3fc09..b5b4f94 100644 --- a/contrib/llvm/lib/Support/StringRef.cpp +++ b/contrib/llvm/lib/Support/StringRef.cpp @@ -46,12 +46,12 @@ int StringRef::compare_lower(StringRef RHS) const { /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { - if (Data[I] == RHS.Data[I]) - continue; + // Check for sequences of digits. if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) { - // The longer sequence of numbers is larger. This doesn't really handle - // prefixed zeros well. - for (size_t J = I+1; J != E+1; ++J) { + // The longer sequence of numbers is considered larger. + // This doesn't really handle prefixed zeros well. + size_t J; + for (J = I + 1; J != E + 1; ++J) { bool ld = J < Length && ascii_isdigit(Data[J]); bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]); if (ld != rd) @@ -59,8 +59,15 @@ int StringRef::compare_numeric(StringRef RHS) const { if (!rd) break; } + // The two number sequences have the same length (J-I), just memcmp them. + if (int Res = compareMemory(Data + I, RHS.Data + I, J - I)) + return Res < 0 ? -1 : 1; + // Identical number sequences, continue search after the numbers. + I = J - 1; + continue; } - return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; + if (Data[I] != RHS.Data[I]) + return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; } if (Length == RHS.Length) return 0; diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp index 293a5d7..7497bfe 100644 --- a/contrib/llvm/lib/Support/TargetRegistry.cpp +++ b/contrib/llvm/lib/Support/TargetRegistry.cpp @@ -7,9 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> +#include <vector> using namespace llvm; // Clients are responsible for avoid race conditions in registration. @@ -90,3 +94,29 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { return TheTarget; } +static int TargetArraySortFn(const void *LHS, const void *RHS) { + typedef std::pair<StringRef, const Target*> pair_ty; + return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first); +} + +void TargetRegistry::printRegisteredTargetsForVersion() { + std::vector<std::pair<StringRef, const Target*> > Targets; + size_t Width = 0; + for (TargetRegistry::iterator I = TargetRegistry::begin(), + E = TargetRegistry::end(); + I != E; ++I) { + Targets.push_back(std::make_pair(I->getName(), &*I)); + Width = std::max(Width, Targets.back().first.size()); + } + array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); + + raw_ostream &OS = outs(); + OS << " Registered Targets:\n"; + for (unsigned i = 0, e = Targets.size(); i != e; ++i) { + OS << " " << Targets[i].first; + OS.indent(Width - Targets[i].first.size()) << " - " + << Targets[i].second->getShortDescription() << '\n'; + } + if (Targets.empty()) + OS << " (none)\n"; +} diff --git a/contrib/llvm/lib/Support/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp index 6b43048..fdb251c 100644 --- a/contrib/llvm/lib/Support/ThreadLocal.cpp +++ b/contrib/llvm/lib/Support/ThreadLocal.cpp @@ -79,6 +79,6 @@ void ThreadLocalImpl::removeInstance() { #elif defined( LLVM_ON_WIN32) #include "Windows/ThreadLocal.inc" #else -#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp +#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 set in Support/ThreadLocal.cpp #endif #endif diff --git a/contrib/llvm/lib/Support/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp index b62b1a9..8f0bb93 100644 --- a/contrib/llvm/lib/Support/Threading.cpp +++ b/contrib/llvm/lib/Support/Threading.cpp @@ -24,7 +24,7 @@ static bool multithreaded_mode = false; static sys::Mutex* global_lock = 0; bool llvm::llvm_start_multithreaded() { -#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1 +#if ENABLE_THREADS != 0 assert(!multithreaded_mode && "Already multithreaded!"); multithreaded_mode = true; global_lock = new sys::Mutex(true); @@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() { } void llvm::llvm_stop_multithreaded() { -#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1 +#if ENABLE_THREADS != 0 assert(multithreaded_mode && "Not currently multithreaded!"); // We fence here to insure that all threaded operations are complete BEFORE we @@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() { if (multithreaded_mode) global_lock->release(); } -#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1 && defined(HAVE_PTHREAD_H) +#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) #include <pthread.h> struct ThreadInfo { @@ -102,13 +102,44 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, error: ::pthread_attr_destroy(&Attr); } +#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32) +#include "Windows/Windows.h" +#include <process.h> -#else +struct ThreadInfo { + void (*func)(void*); + void *param; +}; -// No non-pthread implementation, currently. +static unsigned __stdcall ThreadCallback(void *param) { + struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param); + info->func(info->param); + + return 0; +} void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, unsigned RequestedStackSize) { + struct ThreadInfo param = { Fn, UserData }; + + HANDLE hThread = (HANDLE)::_beginthreadex(NULL, + RequestedStackSize, ThreadCallback, + ¶m, 0, NULL); + + if (hThread) { + // We actually don't care whether the wait succeeds or fails, in + // the same way we don't care whether the pthread_join call succeeds + // or fails. There's not much we could do if this were to fail. But + // on success, this call will wait until the thread finishes executing + // before returning. + (void)::WaitForSingleObject(hThread, INFINITE); + ::CloseHandle(hThread); + } +} +#else +// Support for non-Win32, non-pthread implementation. +void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, + unsigned RequestedStackSize) { (void) RequestedStackSize; Fn(UserData); } diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index 7e094ee..c61af37 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -8,16 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" - #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Twine.h" -#include <cassert> #include <cstring> using namespace llvm; -// - const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case InvalidArch: return "<invalid>"; @@ -29,6 +24,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case cellspu: return "cellspu"; case mips: return "mips"; case mipsel: return "mipsel"; + case mips64: return "mips64"; + case mips64el:return "mips64el"; case msp430: return "msp430"; case ppc64: return "powerpc64"; case ppc: return "powerpc"; @@ -43,6 +40,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case mblaze: return "mblaze"; case ptx32: return "ptx32"; case ptx64: return "ptx64"; + case le32: return "le32"; + case amdil: return "amdil"; } return "<invalid>"; @@ -77,6 +76,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case ptx32: return "ptx"; case ptx64: return "ptx"; + case le32: return "le32"; + case amdil: return "amdil"; } } @@ -102,6 +103,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; case IOS: return "ios"; + case KFreeBSD: return "kfreebsd"; case Linux: return "linux"; case Lv2: return "lv2"; case MacOSX: return "macosx"; @@ -114,6 +116,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Haiku: return "haiku"; case Minix: return "minix"; case RTEMS: return "rtems"; + case NativeClient: return "nacl"; } return "<invalid>"; @@ -144,10 +147,16 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return mips; if (Name == "mipsel") return mipsel; + if (Name == "mips64") + return mips64; + if (Name == "mips64el") + return mips64el; if (Name == "msp430") return msp430; if (Name == "ppc64") return ppc64; + if (Name == "ppc32") + return ppc; if (Name == "ppc") return ppc; if (Name == "mblaze") @@ -172,6 +181,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return ptx32; if (Name == "ptx64") return ptx64; + if (Name == "le32") + return le32; + if (Name == "amdil") + return amdil; return UnknownArch; } @@ -207,13 +220,16 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { // This is derived from the driver driver. if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" || - Str == "armv6" || Str == "armv7") + Str == "armv6" || Str == "armv7" || Str == "armv7f" || Str == "armv7k" || + Str == "armv7s") return Triple::arm; if (Str == "ptx32") return Triple::ptx32; if (Str == "ptx64") return Triple::ptx64; + if (Str == "amdil") + return Triple::amdil; return Triple::UnknownArch; } @@ -249,6 +265,10 @@ const char *Triple::getArchNameForAssembler() { return "ptx32"; if (Str == "ptx64") return "ptx64"; + if (Str == "le32") + return "le32"; + if (Str == "amdil") + return "amdil"; return NULL; } @@ -288,6 +308,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || ArchName == "psp") return mipsel; + else if (ArchName == "mips64" || ArchName == "mips64eb") + return mips64; + else if (ArchName == "mips64el") + return mips64el; else if (ArchName == "sparc") return sparc; else if (ArchName == "sparcv9") @@ -302,6 +326,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return ptx32; else if (ArchName == "ptx64") return ptx64; + else if (ArchName == "le32") + return le32; + else if (ArchName == "amdil") + return amdil; else return UnknownArch; } @@ -330,6 +358,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) { return FreeBSD; else if (OSName.startswith("ios")) return IOS; + else if (OSName.startswith("kfreebsd")) + return KFreeBSD; else if (OSName.startswith("linux")) return Linux; else if (OSName.startswith("lv2")) @@ -354,6 +384,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) { return Minix; else if (OSName.startswith("rtems")) return RTEMS; + else if (OSName.startswith("nacl")) + return NativeClient; else return UnknownOS; } diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp index d62123c..3d04bc3 100644 --- a/contrib/llvm/lib/Support/Twine.cpp +++ b/contrib/llvm/lib/Support/Twine.cpp @@ -16,7 +16,7 @@ using namespace llvm; std::string Twine::str() const { // If we're storing only a std::string, just return it. if (LHSKind == StdStringKind && RHSKind == EmptyKind) - return *static_cast<const std::string*>(LHS); + return *LHS.stdString; // Otherwise, flatten and copy the contents first. SmallString<256> Vec; @@ -40,9 +40,9 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const { switch (getLHSKind()) { case CStringKind: // Already null terminated, yay! - return StringRef(static_cast<const char*>(LHS)); + return StringRef(LHS.cString); case StdStringKind: { - const std::string *str = static_cast<const std::string*>(LHS); + const std::string *str = LHS.stdString; return StringRef(str->c_str(), str->size()); } default: @@ -55,48 +55,51 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const { return StringRef(Out.data(), Out.size()); } -void Twine::printOneChild(raw_ostream &OS, const void *Ptr, +void Twine::printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const { switch (Kind) { case Twine::NullKind: break; case Twine::EmptyKind: break; case Twine::TwineKind: - static_cast<const Twine*>(Ptr)->print(OS); + Ptr.twine->print(OS); break; case Twine::CStringKind: - OS << static_cast<const char*>(Ptr); + OS << Ptr.cString; break; case Twine::StdStringKind: - OS << *static_cast<const std::string*>(Ptr); + OS << *Ptr.stdString; break; case Twine::StringRefKind: - OS << *static_cast<const StringRef*>(Ptr); + OS << *Ptr.stringRef; + break; + case Twine::CharKind: + OS << Ptr.character; break; case Twine::DecUIKind: - OS << (unsigned)(uintptr_t)Ptr; + OS << Ptr.decUI; break; case Twine::DecIKind: - OS << (int)(intptr_t)Ptr; + OS << Ptr.decI; break; case Twine::DecULKind: - OS << *static_cast<const unsigned long*>(Ptr); + OS << *Ptr.decUL; break; case Twine::DecLKind: - OS << *static_cast<const long*>(Ptr); + OS << *Ptr.decL; break; case Twine::DecULLKind: - OS << *static_cast<const unsigned long long*>(Ptr); + OS << *Ptr.decULL; break; case Twine::DecLLKind: - OS << *static_cast<const long long*>(Ptr); + OS << *Ptr.decLL; break; case Twine::UHexKind: - OS.write_hex(*static_cast<const uint64_t*>(Ptr)); + OS.write_hex(*Ptr.uHex); break; } } -void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, +void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr, NodeKind Kind) const { switch (Kind) { case Twine::NullKind: @@ -105,40 +108,43 @@ void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, OS << "empty"; break; case Twine::TwineKind: OS << "rope:"; - static_cast<const Twine*>(Ptr)->printRepr(OS); + Ptr.twine->printRepr(OS); break; case Twine::CStringKind: OS << "cstring:\"" - << static_cast<const char*>(Ptr) << "\""; + << Ptr.cString << "\""; break; case Twine::StdStringKind: OS << "std::string:\"" - << static_cast<const std::string*>(Ptr) << "\""; + << Ptr.stdString << "\""; break; case Twine::StringRefKind: OS << "stringref:\"" - << static_cast<const StringRef*>(Ptr) << "\""; + << Ptr.stringRef << "\""; + break; + case Twine::CharKind: + OS << "char:\"" << Ptr.character << "\""; break; case Twine::DecUIKind: - OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\""; + OS << "decUI:\"" << Ptr.decUI << "\""; break; case Twine::DecIKind: - OS << "decI:\"" << (int)(intptr_t)Ptr << "\""; + OS << "decI:\"" << Ptr.decI << "\""; break; case Twine::DecULKind: - OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\""; + OS << "decUL:\"" << *Ptr.decUL << "\""; break; case Twine::DecLKind: - OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\""; + OS << "decL:\"" << *Ptr.decL << "\""; break; case Twine::DecULLKind: - OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\""; + OS << "decULL:\"" << *Ptr.decULL << "\""; break; case Twine::DecLLKind: - OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\""; + OS << "decLL:\"" << *Ptr.decLL << "\""; break; case Twine::UHexKind: - OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\""; + OS << "uhex:\"" << Ptr.uHex << "\""; break; } } diff --git a/contrib/llvm/lib/Support/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc index eacb08b..1e438e7 100644 --- a/contrib/llvm/lib/Support/Unix/Host.inc +++ b/contrib/llvm/lib/Support/Unix/Host.inc @@ -22,6 +22,7 @@ #include <sys/utsname.h> #include <cctype> #include <string> +#include <cstdlib> // ::getenv using namespace llvm; diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc index f295b92..85c7c40 100644 --- a/contrib/llvm/lib/Support/Unix/Path.inc +++ b/contrib/llvm/lib/Support/Unix/Path.inc @@ -252,8 +252,8 @@ Path::GetUserHomeDirectory() { Path Path::GetCurrentDirectory() { char pathname[MAXPATHLEN]; - if (!getcwd(pathname,MAXPATHLEN)) { - assert (false && "Could not query current working directory."); + if (!getcwd(pathname, MAXPATHLEN)) { + assert(false && "Could not query current working directory."); return Path(); } diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc index 03ff283..bbbc344 100644 --- a/contrib/llvm/lib/Support/Unix/PathV2.inc +++ b/contrib/llvm/lib/Support/Unix/PathV2.inc @@ -42,6 +42,9 @@ #if HAVE_STDIO_H #include <stdio.h> #endif +#if HAVE_LIMITS_H +#include <limits.h> +#endif using namespace llvm; @@ -342,19 +345,22 @@ error_code status(const Twine &path, file_status &result) { } error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl<char> &result_path) { + SmallVectorImpl<char> &result_path, + bool makeAbsolute) { SmallString<128> Model; model.toVector(Model); // Null terminate. Model.c_str(); - // Make model absolute by prepending a temp directory if it's not already. - bool absolute = path::is_absolute(Twine(Model)); - if (!absolute) { - SmallString<128> TDir; - if (error_code ec = TempDir(TDir)) return ec; - path::append(TDir, Twine(Model)); - Model.swap(TDir); + if (makeAbsolute) { + // Make model absolute by prepending a temp directory if it's not already. + bool absolute = path::is_absolute(Twine(Model)); + if (!absolute) { + SmallString<128> TDir; + if (error_code ec = TempDir(TDir)) return ec; + path::append(TDir, Twine(Model)); + Model.swap(TDir); + } } // Replace '%' with random chars. From here on, DO NOT modify model. It may be diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc index 5cdb11c..da440fd 100644 --- a/contrib/llvm/lib/Support/Unix/Process.inc +++ b/contrib/llvm/lib/Support/Unix/Process.inc @@ -293,3 +293,7 @@ const char *Process::OutputBold(bool bg) { const char *Process::ResetColor() { return "\033[0m"; } + +void Process::SetWorkingDirectory(std::string Path) { + ::chdir(Path.c_str()); +} diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc index fc5f580..83da82a 100644 --- a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc +++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc @@ -39,7 +39,7 @@ using namespace sys; //=== and must not be UNIX code. //===----------------------------------------------------------------------===// -static std::vector<HMODULE> OpenedHandles; +static DenseSet<HMODULE> *OpenedHandles; extern "C" { @@ -63,30 +63,43 @@ extern "C" { #endif stricmp(ModuleName, "msvcrt20") != 0 && stricmp(ModuleName, "msvcrt40") != 0) { - OpenedHandles.push_back((HMODULE)ModuleBase); + OpenedHandles->insert((HMODULE)ModuleBase); } return TRUE; } } -bool DynamicLibrary::LoadLibraryPermanently(const char *filename, - std::string *ErrMsg) { - if (filename) { - HMODULE a_handle = LoadLibrary(filename); +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, + std::string *errMsg) { + SmartScopedLock<true> lock(getMutex()); - if (a_handle == 0) - return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : "); + if (!filename) { + // When no file is specified, enumerate all DLLs and EXEs in the process. + if (OpenedHandles == 0) + OpenedHandles = new DenseSet<HMODULE>(); - OpenedHandles.push_back(a_handle); - } else { - // When no file is specified, enumerate all DLLs and EXEs in the - // process. EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0); + // Dummy library that represents "search all handles". + // This is mostly to ensure that the return value still shows up as "valid". + return DynamicLibrary(&OpenedHandles); } + + HMODULE a_handle = LoadLibrary(filename); - // Because we don't remember the handle, we will never free it; hence, - // it is loaded permanently. - return false; + if (a_handle == 0) { + MakeErrMsg(errMsg, std::string(filename) + ": Can't open : "); + return DynamicLibrary(); + } + + if (OpenedHandles == 0) + OpenedHandles = new DenseSet<HMODULE>(); + + // If we've already loaded this library, FreeLibrary() the handle in order to + // keep the internal refcount at +1. + if (!OpenedHandles->insert(a_handle).second) + FreeLibrary(a_handle); + + return DynamicLibrary(a_handle); } // Stack probing routines are in the support library (e.g. libgcc), but we don't @@ -101,21 +114,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename, #undef EXPLICIT_SYMBOL2 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { + SmartScopedLock<true> Lock(getMutex()); + // First check symbols added via AddSymbol(). if (ExplicitSymbols) { - std::map<std::string, void *>::iterator I = - ExplicitSymbols->find(symbolName); - std::map<std::string, void *>::iterator E = ExplicitSymbols->end(); - if (I != E) - return I->second; + StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); + + if (i != ExplicitSymbols->end()) + return i->second; } // Now search the libraries. - for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(), - E = OpenedHandles.end(); I != E; ++I) { - FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); - if (ptr) { - return (void *)(intptr_t)ptr; + if (OpenedHandles) { + for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(), + E = OpenedHandles->end(); I != E; ++I) { + FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); + if (ptr) { + return (void *)(intptr_t)ptr; + } } } @@ -134,4 +150,14 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { return 0; } + +void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { + if (!isValid()) + return NULL; + if (Data == &OpenedHandles) + return SearchForAddressOfSymbol(symbolName); + return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName); +} + + } diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc index 9f69e73..fcc7283 100644 --- a/contrib/llvm/lib/Support/Windows/Memory.inc +++ b/contrib/llvm/lib/Support/Windows/Memory.inc @@ -32,11 +32,16 @@ MemoryBlock Memory::AllocateRWX(size_t NumBytes, static const size_t pageSize = Process::GetPageSize(); size_t NumPages = (NumBytes+pageSize-1)/pageSize; - //FIXME: support NearBlock if ever needed on Win64. + PVOID start = NearBlock ? static_cast<unsigned char *>(NearBlock->base()) + + NearBlock->size() : NULL; - void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT, + void *pa = VirtualAlloc(start, NumPages*pageSize, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); if (pa == NULL) { + if (NearBlock) { + // Try again without the NearBlock hint + return AllocateRWX(NumBytes, NULL, ErrMsg); + } MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: "); return MemoryBlock(); } @@ -54,20 +59,62 @@ bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) { return false; } +static DWORD getProtection(const void *addr) { + MEMORY_BASIC_INFORMATION info; + if (sizeof(info) == ::VirtualQuery(addr, &info, sizeof(info))) { + return info.Protect; + } + return 0; +} + bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) { + if (!setRangeWritable(M.Address, M.Size)) { + return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: "); + } return true; } bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) { - return false; + if (!setRangeExecutable(M.Address, M.Size)) { + return MakeErrMsg(ErrMsg, "Cannot set memory to executable: "); + } + return true; } bool Memory::setRangeWritable(const void *Addr, size_t Size) { - return true; + DWORD prot = getProtection(Addr); + if (!prot) + return false; + + if (prot == PAGE_EXECUTE || prot == PAGE_EXECUTE_READ) { + prot = PAGE_EXECUTE_READWRITE; + } else if (prot == PAGE_NOACCESS || prot == PAGE_READONLY) { + prot = PAGE_READWRITE; + } + + DWORD oldProt; + sys::Memory::InvalidateInstructionCache(Addr, Size); + return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt) + == TRUE; } bool Memory::setRangeExecutable(const void *Addr, size_t Size) { - return false; + DWORD prot = getProtection(Addr); + if (!prot) + return false; + + if (prot == PAGE_NOACCESS) { + prot = PAGE_EXECUTE; + } else if (prot == PAGE_READONLY) { + prot = PAGE_EXECUTE_READ; + } else if (prot == PAGE_READWRITE) { + prot = PAGE_EXECUTE_READWRITE; + } + + DWORD oldProt; + sys::Memory::InvalidateInstructionCache(Addr, Size); + return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt) + == TRUE; } } diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc index af71b73..bc597b2 100644 --- a/contrib/llvm/lib/Support/Windows/PathV2.inc +++ b/contrib/llvm/lib/Support/Windows/PathV2.inc @@ -445,13 +445,35 @@ error_code file_size(const Twine &path, uint64_t &result) { return success; } +static bool isReservedName(StringRef path) { + // This list of reserved names comes from MSDN, at: + // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + static const char *sReservedNames[] = { "nul", "con", "prn", "aux", + "com1", "com2", "com3", "com4", "com5", "com6", + "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", + "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" }; + + // First, check to see if this is a device namespace, which always + // starts with \\.\, since device namespaces are not legal file paths. + if (path.startswith("\\\\.\\")) + return true; + + // Then compare against the list of ancient reserved names + for (size_t i = 0; i < sizeof(sReservedNames) / sizeof(const char *); ++i) { + if (path.equals_lower(sReservedNames[i])) + return true; + } + + // The path isn't what we consider reserved. + return false; +} + error_code status(const Twine &path, file_status &result) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; StringRef path8 = path.toStringRef(path_storage); - // FIXME: We should detect as many "special file name" as possible. - if (path8.compare_lower("nul") == 0) { + if (isReservedName(path8)) { result = file_status(file_type::character_file); return success; } @@ -501,7 +523,8 @@ handle_status_error: } error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl<char> &result_path) { + SmallVectorImpl<char> &result_path, + bool makeAbsolute) { // Use result_path as temp storage. result_path.set_size(0); StringRef m = model.toStringRef(result_path); @@ -509,17 +532,19 @@ error_code unique_file(const Twine &model, int &result_fd, SmallVector<wchar_t, 128> model_utf16; if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec; - // Make model absolute by prepending a temp directory if it's not already. - bool absolute = path::is_absolute(m); - - if (!absolute) { - SmallVector<wchar_t, 64> temp_dir; - if (error_code ec = TempDir(temp_dir)) return ec; - // Handle c: by removing it. - if (model_utf16.size() > 2 && model_utf16[1] == L':') { - model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2); + if (makeAbsolute) { + // Make model absolute by prepending a temp directory if it's not already. + bool absolute = path::is_absolute(m); + + if (!absolute) { + SmallVector<wchar_t, 64> temp_dir; + if (error_code ec = TempDir(temp_dir)) return ec; + // Handle c: by removing it. + if (model_utf16.size() > 2 && model_utf16[1] == L':') { + model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2); + } + model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end()); } - model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end()); } // Replace '%' with random chars. From here on, DO NOT modify model. It may be diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc index 06a7f00..fe54eb1 100644 --- a/contrib/llvm/lib/Support/Windows/Process.inc +++ b/contrib/llvm/lib/Support/Windows/Process.inc @@ -15,6 +15,7 @@ #include <psapi.h> #include <malloc.h> #include <io.h> +#include <direct.h> #ifdef __MINGW32__ #if (HAVE_LIBPSAPI != 1) @@ -219,4 +220,8 @@ const char *Process::ResetColor() { return 0; } +void Process::SetWorkingDirectory(std::string Path) { + ::_chdir(Path.c_str()); +} + } diff --git a/contrib/llvm/lib/Support/Windows/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc index 471f8fa..26b9bba 100644 --- a/contrib/llvm/lib/Support/Windows/RWMutex.inc +++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc @@ -18,39 +18,115 @@ #include "Windows.h" -// FIXME: Windows does not have reader-writer locks pre-Vista. If you want -// real reader-writer locks, you a threads implementation for Windows. - namespace llvm { using namespace sys; +// Windows has slim read-writer lock support on Vista and higher, so we +// will attempt to load the APIs. If they exist, we will use them, and +// if not, we will fall back on critical sections. When we drop support +// for XP, we can stop lazy-loading these APIs and just use them directly. +#if defined(__MINGW32__) + // Taken from WinNT.h + typedef struct _RTL_SRWLOCK { + PVOID Ptr; + } RTL_SRWLOCK, *PRTL_SRWLOCK; + + // Taken from WinBase.h + typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK; +#endif + +static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL; +static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL; +static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL; +static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL; +static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL; + +static bool sHasSRW = false; + +static bool loadSRW() { + static bool sChecked = false; + if (!sChecked) { + sChecked = true; + + HMODULE hLib = ::LoadLibrary(TEXT("Kernel32")); + if (hLib) { + fpInitializeSRWLock = + (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, + "InitializeSRWLock"); + fpAcquireSRWLockExclusive = + (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, + "AcquireSRWLockExclusive"); + fpAcquireSRWLockShared = + (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, + "AcquireSRWLockShared"); + fpReleaseSRWLockExclusive = + (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, + "ReleaseSRWLockExclusive"); + fpReleaseSRWLockShared = + (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, + "ReleaseSRWLockShared"); + ::FreeLibrary(hLib); + + if (fpInitializeSRWLock != NULL) { + sHasSRW = true; + } + } + } + return sHasSRW; +} + RWMutexImpl::RWMutexImpl() { - data_ = calloc(1, sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + if (loadSRW()) { + data_ = calloc(1, sizeof(SRWLOCK)); + fpInitializeSRWLock(static_cast<PSRWLOCK>(data_)); + } else { + data_ = calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + } } RWMutexImpl::~RWMutexImpl() { - DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); - free(data_); + if (sHasSRW) { + // Nothing to do in the case of slim reader/writers + } else { + DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + free(data_); + } } bool RWMutexImpl::reader_acquire() { - EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + if (sHasSRW) { + fpAcquireSRWLockShared(static_cast<PSRWLOCK>(data_)); + } else { + EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + } return true; } bool RWMutexImpl::reader_release() { - LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + if (sHasSRW) { + fpReleaseSRWLockShared(static_cast<PSRWLOCK>(data_)); + } else { + LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + } return true; } bool RWMutexImpl::writer_acquire() { - EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + if (sHasSRW) { + fpAcquireSRWLockExclusive(static_cast<PSRWLOCK>(data_)); + } else { + EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + } return true; } bool RWMutexImpl::writer_release() { - LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + if (sHasSRW) { + fpReleaseSRWLockExclusive(static_cast<PSRWLOCK>(data_)); + } else { + LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + } return true; } diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc index 14f3f21..0d4b8a2 100644 --- a/contrib/llvm/lib/Support/Windows/Signals.inc +++ b/contrib/llvm/lib/Support/Windows/Signals.inc @@ -23,14 +23,133 @@ #endif #include <psapi.h> -#ifdef __MINGW32__ +#ifdef _MSC_VER + #pragma comment(lib, "psapi.lib") + #pragma comment(lib, "dbghelp.lib") +#elif __MINGW32__ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1)) #error "libimagehlp.a & libpsapi.a should be present" #endif -#else - #pragma comment(lib, "psapi.lib") - #pragma comment(lib, "dbghelp.lib") -#endif + // The version of g++ that comes with MinGW does *not* properly understand + // the ll format specifier for printf. However, MinGW passes the format + // specifiers on to the MSVCRT entirely, and the CRT understands the ll + // specifier. So these warnings are spurious in this case. Since we compile + // with -Wall, this will generate these warnings which should be ignored. So + // we will turn off the warnings for this just file. However, MinGW also does + // not support push and pop for diagnostics, so we have to manually turn it + // back on at the end of the file. + #pragma GCC diagnostic ignored "-Wformat" + #pragma GCC diagnostic ignored "-Wformat-extra-args" + + #if !defined(__MINGW64_VERSION_MAJOR) + // MinGW.org does not have updated support for the 64-bit versions of the + // DebugHlp APIs. So we will have to load them manually. The structures and + // method signatures were pulled from DbgHelp.h in the Windows Platform SDK, + // and adjusted for brevity. + typedef struct _IMAGEHLP_LINE64 { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD64 Address; + } IMAGEHLP_LINE64, *PIMAGEHLP_LINE64; + + typedef struct _IMAGEHLP_SYMBOL64 { + DWORD SizeOfStruct; + DWORD64 Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL64, *PIMAGEHLP_SYMBOL64; + + typedef struct _tagADDRESS64 { + DWORD64 Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS64, *LPADDRESS64; + + typedef struct _KDHELP64 { + DWORD64 Thread; + DWORD ThCallbackStack; + DWORD ThCallbackBStore; + DWORD NextCallback; + DWORD FramePointer; + DWORD64 KiCallUserMode; + DWORD64 KeUserCallbackDispatcher; + DWORD64 SystemRangeStart; + DWORD64 KiUserExceptionDispatcher; + DWORD64 StackBase; + DWORD64 StackLimit; + DWORD64 Reserved[5]; + } KDHELP64, *PKDHELP64; + + typedef struct _tagSTACKFRAME64 { + ADDRESS64 AddrPC; + ADDRESS64 AddrReturn; + ADDRESS64 AddrFrame; + ADDRESS64 AddrStack; + ADDRESS64 AddrBStore; + PVOID FuncTableEntry; + DWORD64 Params[4]; + BOOL Far; + BOOL Virtual; + DWORD64 Reserved[3]; + KDHELP64 KdHelp; + } STACKFRAME64, *LPSTACKFRAME64; + +typedef BOOL (__stdcall *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess, + DWORD64 qwBaseAddress, PVOID lpBuffer, DWORD nSize, + LPDWORD lpNumberOfBytesRead); + +typedef PVOID (__stdcall *PFUNCTION_TABLE_ACCESS_ROUTINE64)( HANDLE ahProcess, + DWORD64 AddrBase); + +typedef DWORD64 (__stdcall *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess, + DWORD64 Address); + +typedef DWORD64 (__stdcall *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess, + HANDLE hThread, LPADDRESS64 lpaddr); + +typedef BOOL (WINAPI *fpStackWalk64)(DWORD, HANDLE, HANDLE, LPSTACKFRAME64, + PVOID, PREAD_PROCESS_MEMORY_ROUTINE64, + PFUNCTION_TABLE_ACCESS_ROUTINE64, + PGET_MODULE_BASE_ROUTINE64, + PTRANSLATE_ADDRESS_ROUTINE64); +static fpStackWalk64 StackWalk64; + +typedef DWORD64 (WINAPI *fpSymGetModuleBase64)(HANDLE, DWORD64); +static fpSymGetModuleBase64 SymGetModuleBase64; + +typedef BOOL (WINAPI *fpSymGetSymFromAddr64)(HANDLE, DWORD64, + PDWORD64, PIMAGEHLP_SYMBOL64); +static fpSymGetSymFromAddr64 SymGetSymFromAddr64; + +typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64, + PDWORD, PIMAGEHLP_LINE64); +static fpSymGetLineFromAddr64 SymGetLineFromAddr64; + +typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); +static fpSymFunctionTableAccess64 SymFunctionTableAccess64; + +static bool load64BitDebugHelp(void) { + HMODULE hLib = ::LoadLibrary("Dbghelp.dll"); + if (hLib) { + StackWalk64 = (fpStackWalk64) + ::GetProcAddress(hLib, "StackWalk64"); + SymGetModuleBase64 = (fpSymGetModuleBase64) + ::GetProcAddress(hLib, "SymGetModuleBase64"); + SymGetSymFromAddr64 = (fpSymGetSymFromAddr64) + ::GetProcAddress(hLib, "SymGetSymFromAddr64"); + SymGetLineFromAddr64 = (fpSymGetLineFromAddr64) + ::GetProcAddress(hLib, "SymGetLineFromAddr64"); + SymFunctionTableAccess64 = (fpSymFunctionTableAccess64) + ::GetProcAddress(hLib, "SymFunctionTableAccess64"); + } + return StackWalk64 != NULL; +} + #endif // !defined(__MINGW64_VERSION_MAJOR) +#endif // __MINGW32__ // Forward declare. static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep); @@ -90,6 +209,18 @@ static int CRTReportHook(int ReportType, char *Message, int *Return) { #endif static void RegisterHandler() { +#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR) + // On MinGW.org, we need to load up the symbols explicitly, because the + // Win32 framework they include does not have support for the 64-bit + // versions of the APIs we need. If we cannot load up the APIs (which + // would be unexpected as they should exist on every version of Windows + // we support), we will bail out since there would be nothing to report. + if (!load64BitDebugHelp()) { + assert(false && "These APIs should always be available"); + return; + } +#endif + if (RegisteredUnhandledExceptionFilter) { EnterCriticalSection(&CriticalSection); return; @@ -213,20 +344,28 @@ void llvm::sys::RunInterruptHandlers() { static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { Cleanup(); -#ifdef _WIN64 - // TODO: provide a x64 friendly version of the following -#else - // Initialize the STACKFRAME structure. - STACKFRAME StackFrame; + STACKFRAME64 StackFrame; memset(&StackFrame, 0, sizeof(StackFrame)); + DWORD machineType; +#if defined(_M_X64) + machineType = IMAGE_FILE_MACHINE_AMD64; + StackFrame.AddrPC.Offset = ep->ContextRecord->Rip; + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp; + StackFrame.AddrFrame.Mode = AddrModeFlat; +#elif defined(_M_IX86) + machineType = IMAGE_FILE_MACHINE_I386; StackFrame.AddrPC.Offset = ep->ContextRecord->Eip; StackFrame.AddrPC.Mode = AddrModeFlat; StackFrame.AddrStack.Offset = ep->ContextRecord->Esp; StackFrame.AddrStack.Mode = AddrModeFlat; StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp; StackFrame.AddrFrame.Mode = AddrModeFlat; +#endif HANDLE hProcess = GetCurrentProcess(); HANDLE hThread = GetCurrentThread(); @@ -236,9 +375,9 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { SymInitialize(hProcess, NULL, TRUE); while (true) { - if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame, - ep->ContextRecord, NULL, SymFunctionTableAccess, - SymGetModuleBase, NULL)) { + if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, + ep->ContextRecord, NULL, SymFunctionTableAccess64, + SymGetModuleBase64, NULL)) { break; } @@ -246,54 +385,66 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { break; // Print the PC in hexadecimal. - DWORD PC = StackFrame.AddrPC.Offset; - fprintf(stderr, "%08lX", PC); + DWORD64 PC = StackFrame.AddrPC.Offset; +#if defined(_M_X64) + fprintf(stderr, "0x%016llX", PC); +#elif defined(_M_IX86) + fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC)); +#endif // Print the parameters. Assume there are four. +#if defined(_M_X64) + fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)", + StackFrame.Params[0], + StackFrame.Params[1], + StackFrame.Params[2], + StackFrame.Params[3]); +#elif defined(_M_IX86) fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", - StackFrame.Params[0], - StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]); - + static_cast<DWORD>(StackFrame.Params[0]), + static_cast<DWORD>(StackFrame.Params[1]), + static_cast<DWORD>(StackFrame.Params[2]), + static_cast<DWORD>(StackFrame.Params[3])); +#endif // Verify the PC belongs to a module in this process. - if (!SymGetModuleBase(hProcess, PC)) { + if (!SymGetModuleBase64(hProcess, PC)) { fputs(" <unknown module>\n", stderr); continue; } // Print the symbol name. char buffer[512]; - IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer); - memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL)); - symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL); - symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL); + IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer); + memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64)); + symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); + symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64); - DWORD dwDisp; - if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) { + DWORD64 dwDisp; + if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) { fputc('\n', stderr); continue; } buffer[511] = 0; if (dwDisp > 0) - fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp); + fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp); else fprintf(stderr, ", %s", symbol->Name); // Print the source file and line number information. - IMAGEHLP_LINE line; + IMAGEHLP_LINE64 line; + DWORD dwLineDisp; memset(&line, 0, sizeof(line)); line.SizeOfStruct = sizeof(line); - if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) { + if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) { fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber); - if (dwDisp > 0) - fprintf(stderr, "+%04lu byte(s)", dwDisp); + if (dwLineDisp > 0) + fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp); } fputc('\n', stderr); } -#endif - if (ExitOnUnhandledExceptions) _exit(-3); @@ -326,3 +477,12 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) { LeaveCriticalSection(&CriticalSection); return FALSE; } + +#if __MINGW32__ + // We turned these warnings off for this file so that MinGW-g++ doesn't + // complain about the ll format specifiers used. Now we are turning the + // warnings back on. If MinGW starts to support diagnostic stacks, we can + // replace this with a pop. + #pragma GCC diagnostic warning "-Wformat" + #pragma GCC diagnostic warning "-Wformat-extra-args" +#endif diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h index 4a1553b..67b6f01 100644 --- a/contrib/llvm/lib/Support/Windows/Windows.h +++ b/contrib/llvm/lib/Support/Windows/Windows.h @@ -19,9 +19,9 @@ // mingw-w64 tends to define it as 0x0502 in its headers. #undef _WIN32_WINNT -// Require at least Windows 2000 API. -#define _WIN32_WINNT 0x0500 -#define _WIN32_IE 0x0500 // MinGW at it again. +// Require at least Windows XP(5.1) API. +#define _WIN32_WINNT 0x0501 +#define _WIN32_IE 0x0600 // MinGW at it again. #define WIN32_LEAN_AND_MEAN #include "llvm/Config/config.h" // Get build system configuration settings diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp index 5a71fa3..4927e9a 100644 --- a/contrib/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm/lib/Support/raw_ostream.cpp @@ -84,7 +84,7 @@ void raw_ostream::SetBuffered() { } void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, - BufferKind Mode) { + BufferKind Mode) { assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || (Mode != Unbuffered && BufferStart && Size)) && "stream must be unbuffered or have at least one byte"); @@ -121,7 +121,8 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) { raw_ostream &raw_ostream::operator<<(long N) { if (N < 0) { *this << '-'; - N = -N; + // Avoid undefined behavior on LONG_MIN with a cast. + N = -(unsigned long)N; } return this->operator<<(static_cast<unsigned long>(N)); @@ -284,7 +285,7 @@ raw_ostream &raw_ostream::write(unsigned char C) { raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // Group exceptional cases into a single branch. - if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) { + if (BUILTIN_EXPECT(size_t(OutBufEnd - OutBufCur) < Size, false)) { if (BUILTIN_EXPECT(!OutBufStart, false)) { if (BufferMode == Unbuffered) { write_impl(Ptr, Size); diff --git a/contrib/llvm/lib/TableGen/Error.cpp b/contrib/llvm/lib/TableGen/Error.cpp new file mode 100644 index 0000000..5b2cbbf --- /dev/null +++ b/contrib/llvm/lib/TableGen/Error.cpp @@ -0,0 +1,39 @@ +//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains error handling helper routines to pretty-print diagnostic +// messages from tblgen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +SourceMgr SrcMgr; + +void PrintError(SMLoc ErrorLoc, const Twine &Msg) { + SrcMgr.PrintMessage(ErrorLoc, Msg, "error"); +} + +void PrintError(const char *Loc, const Twine &Msg) { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error"); +} + +void PrintError(const Twine &Msg) { + errs() << "error:" << Msg << "\n"; +} + +void PrintError(const TGError &Error) { + PrintError(Error.getLoc(), Error.getMessage()); +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp new file mode 100644 index 0000000..01bc55e --- /dev/null +++ b/contrib/llvm/lib/TableGen/Main.cpp @@ -0,0 +1,124 @@ +//===- Main.cpp - Top-Level TableGen implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TableGen is a tool which can be used to build up a description of something, +// then invoke one or more "tablegen backends" to emit information about the +// description in some predefined format. In practice, this is used by the LLVM +// code generators to automate generation of a code generator through a +// high-level description of the target. +// +//===----------------------------------------------------------------------===// + +#include "TGParser.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/system_error.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenAction.h" +#include <algorithm> +#include <cstdio> +using namespace llvm; + +namespace { + cl::opt<std::string> + OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"), + cl::init("-")); + + cl::opt<std::string> + DependFilename("d", cl::desc("Dependency filename"), cl::value_desc("filename"), + cl::init("")); + + cl::opt<std::string> + InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-")); + + cl::list<std::string> + IncludeDirs("I", cl::desc("Directory of include files"), + cl::value_desc("directory"), cl::Prefix); +} + +namespace llvm { + +int TableGenMain(char *argv0, TableGenAction &Action) { + RecordKeeper Records; + + try { + // Parse the input file. + OwningPtr<MemoryBuffer> File; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { + errs() << "Could not open input file '" << InputFilename << "': " + << ec.message() <<"\n"; + return 1; + } + MemoryBuffer *F = File.take(); + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(F, SMLoc()); + + // Record the location of the include directory so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + TGParser Parser(SrcMgr, Records); + + if (Parser.ParseFile()) + return 1; + + std::string Error; + tool_output_file Out(OutputFilename.c_str(), Error); + if (!Error.empty()) { + errs() << argv0 << ": error opening " << OutputFilename + << ":" << Error << "\n"; + return 1; + } + if (!DependFilename.empty()) { + if (OutputFilename == "-") { + errs() << argv0 << ": the option -d must be used together with -o\n"; + return 1; + } + tool_output_file DepOut(DependFilename.c_str(), Error); + if (!Error.empty()) { + errs() << argv0 << ": error opening " << DependFilename + << ":" << Error << "\n"; + return 1; + } + DepOut.os() << OutputFilename << ":"; + const std::vector<std::string> &Dependencies = Parser.getDependencies(); + for (std::vector<std::string>::const_iterator I = Dependencies.begin(), + E = Dependencies.end(); + I != E; ++I) { + DepOut.os() << " " << (*I); + } + DepOut.os() << "\n"; + DepOut.keep(); + } + + if (Action(Out.os(), Records)) + return 1; + + // Declare success. + Out.keep(); + return 0; + + } catch (const TGError &Error) { + PrintError(Error); + } catch (const std::string &Error) { + PrintError(Error); + } catch (const char *Error) { + PrintError(Error); + } catch (...) { + errs() << argv0 << ": Unknown unexpected exception occurred.\n"; + } + + return 1; +} + +} diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp new file mode 100644 index 0000000..b7c51ca --- /dev/null +++ b/contrib/llvm/lib/TableGen/Record.cpp @@ -0,0 +1,2019 @@ +//===- Record.cpp - Record implementation ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the tablegen record classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/Error.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// std::string wrapper for DenseMap purposes +//===----------------------------------------------------------------------===// + +/// TableGenStringKey - This is a wrapper for std::string suitable for +/// using as a key to a DenseMap. Because there isn't a particularly +/// good way to indicate tombstone or empty keys for strings, we want +/// to wrap std::string to indicate that this is a "special" string +/// not expected to take on certain values (those of the tombstone and +/// empty keys). This makes things a little safer as it clarifies +/// that DenseMap is really not appropriate for general strings. + +class TableGenStringKey { +public: + TableGenStringKey(const std::string &str) : data(str) {} + TableGenStringKey(const char *str) : data(str) {} + + const std::string &str() const { return data; } + +private: + std::string data; +}; + +/// Specialize DenseMapInfo for TableGenStringKey. +namespace llvm { + +template<> struct DenseMapInfo<TableGenStringKey> { + static inline TableGenStringKey getEmptyKey() { + TableGenStringKey Empty("<<<EMPTY KEY>>>"); + return Empty; + } + static inline TableGenStringKey getTombstoneKey() { + TableGenStringKey Tombstone("<<<TOMBSTONE KEY>>>"); + return Tombstone; + } + static unsigned getHashValue(const TableGenStringKey& Val) { + return HashString(Val.str()); + } + static bool isEqual(const TableGenStringKey& LHS, + const TableGenStringKey& RHS) { + return LHS.str() == RHS.str(); + } +}; + +} + +//===----------------------------------------------------------------------===// +// Type implementations +//===----------------------------------------------------------------------===// + +BitRecTy BitRecTy::Shared; +IntRecTy IntRecTy::Shared; +StringRecTy StringRecTy::Shared; +CodeRecTy CodeRecTy::Shared; +DagRecTy DagRecTy::Shared; + +void RecTy::dump() const { print(errs()); } + +ListRecTy *RecTy::getListTy() { + if (!ListTy) + ListTy = new ListRecTy(this); + return ListTy; +} + +Init *BitRecTy::convertValue(BitsInit *BI) { + if (BI->getNumBits() != 1) return 0; // Only accept if just one bit! + return BI->getBit(0); +} + +bool BitRecTy::baseClassOf(const BitsRecTy *RHS) const { + return RHS->getNumBits() == 1; +} + +Init *BitRecTy::convertValue(IntInit *II) { + int64_t Val = II->getValue(); + if (Val != 0 && Val != 1) return 0; // Only accept 0 or 1 for a bit! + + return BitInit::get(Val != 0); +} + +Init *BitRecTy::convertValue(TypedInit *VI) { + if (dynamic_cast<BitRecTy*>(VI->getType())) + return VI; // Accept variable if it is already of bit type! + return 0; +} + +BitsRecTy *BitsRecTy::get(unsigned Sz) { + static std::vector<BitsRecTy*> Shared; + if (Sz >= Shared.size()) + Shared.resize(Sz + 1); + BitsRecTy *&Ty = Shared[Sz]; + if (!Ty) + Ty = new BitsRecTy(Sz); + return Ty; +} + +std::string BitsRecTy::getAsString() const { + return "bits<" + utostr(Size) + ">"; +} + +Init *BitsRecTy::convertValue(UnsetInit *UI) { + SmallVector<Init *, 16> NewBits(Size); + + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = UnsetInit::get(); + + return BitsInit::get(NewBits); +} + +Init *BitsRecTy::convertValue(BitInit *UI) { + if (Size != 1) return 0; // Can only convert single bit. + return BitsInit::get(UI); +} + +/// canFitInBitfield - Return true if the number of bits is large enough to hold +/// the integer value. +static bool canFitInBitfield(int64_t Value, unsigned NumBits) { + // For example, with NumBits == 4, we permit Values from [-7 .. 15]. + return (NumBits >= sizeof(Value) * 8) || + (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1); +} + +/// convertValue from Int initializer to bits type: Split the integer up into the +/// appropriate bits. +/// +Init *BitsRecTy::convertValue(IntInit *II) { + int64_t Value = II->getValue(); + // Make sure this bitfield is large enough to hold the integer value. + if (!canFitInBitfield(Value, Size)) + return 0; + + SmallVector<Init *, 16> NewBits(Size); + + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = BitInit::get(Value & (1LL << i)); + + return BitsInit::get(NewBits); +} + +Init *BitsRecTy::convertValue(BitsInit *BI) { + // If the number of bits is right, return it. Otherwise we need to expand or + // truncate. + if (BI->getNumBits() == Size) return BI; + return 0; +} + +Init *BitsRecTy::convertValue(TypedInit *VI) { + if (BitsRecTy *BRT = dynamic_cast<BitsRecTy*>(VI->getType())) + if (BRT->Size == Size) { + SmallVector<Init *, 16> NewBits(Size); + + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = VarBitInit::get(VI, i); + return BitsInit::get(NewBits); + } + + if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType())) + return BitsInit::get(VI); + + if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) { + if (Tern->getOpcode() == TernOpInit::IF) { + Init *LHS = Tern->getLHS(); + Init *MHS = Tern->getMHS(); + Init *RHS = Tern->getRHS(); + + IntInit *MHSi = dynamic_cast<IntInit*>(MHS); + IntInit *RHSi = dynamic_cast<IntInit*>(RHS); + + if (MHSi && RHSi) { + int64_t MHSVal = MHSi->getValue(); + int64_t RHSVal = RHSi->getValue(); + + if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) { + SmallVector<Init *, 16> NewBits(Size); + + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = + TernOpInit::get(TernOpInit::IF, LHS, + IntInit::get((MHSVal & (1LL << i)) ? 1 : 0), + IntInit::get((RHSVal & (1LL << i)) ? 1 : 0), + VI->getType()); + + return BitsInit::get(NewBits); + } + } else { + BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS); + BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS); + + if (MHSbs && RHSbs) { + SmallVector<Init *, 16> NewBits(Size); + + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = TernOpInit::get(TernOpInit::IF, LHS, + MHSbs->getBit(i), + RHSbs->getBit(i), + VI->getType()); + + return BitsInit::get(NewBits); + } + } + } + } + + return 0; +} + +Init *IntRecTy::convertValue(BitInit *BI) { + return IntInit::get(BI->getValue()); +} + +Init *IntRecTy::convertValue(BitsInit *BI) { + int64_t Result = 0; + for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) + if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) { + Result |= Bit->getValue() << i; + } else { + return 0; + } + return IntInit::get(Result); +} + +Init *IntRecTy::convertValue(TypedInit *TI) { + if (TI->getType()->typeIsConvertibleTo(this)) + return TI; // Accept variable if already of the right type! + return 0; +} + +Init *StringRecTy::convertValue(UnOpInit *BO) { + if (BO->getOpcode() == UnOpInit::CAST) { + Init *L = BO->getOperand()->convertInitializerTo(this); + if (L == 0) return 0; + if (L != BO->getOperand()) + return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy); + return BO; + } + + return convertValue((TypedInit*)BO); +} + +Init *StringRecTy::convertValue(BinOpInit *BO) { + if (BO->getOpcode() == BinOpInit::STRCONCAT) { + Init *L = BO->getLHS()->convertInitializerTo(this); + Init *R = BO->getRHS()->convertInitializerTo(this); + if (L == 0 || R == 0) return 0; + if (L != BO->getLHS() || R != BO->getRHS()) + return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy); + return BO; + } + + return convertValue((TypedInit*)BO); +} + + +Init *StringRecTy::convertValue(TypedInit *TI) { + if (dynamic_cast<StringRecTy*>(TI->getType())) + return TI; // Accept variable if already of the right type! + return 0; +} + +std::string ListRecTy::getAsString() const { + return "list<" + Ty->getAsString() + ">"; +} + +Init *ListRecTy::convertValue(ListInit *LI) { + std::vector<Init*> Elements; + + // Verify that all of the elements of the list are subclasses of the + // appropriate class! + for (unsigned i = 0, e = LI->getSize(); i != e; ++i) + if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty)) + Elements.push_back(CI); + else + return 0; + + ListRecTy *LType = dynamic_cast<ListRecTy*>(LI->getType()); + if (LType == 0) { + return 0; + } + + return ListInit::get(Elements, this); +} + +Init *ListRecTy::convertValue(TypedInit *TI) { + // Ensure that TI is compatible with our class. + if (ListRecTy *LRT = dynamic_cast<ListRecTy*>(TI->getType())) + if (LRT->getElementType()->typeIsConvertibleTo(getElementType())) + return TI; + return 0; +} + +Init *CodeRecTy::convertValue(TypedInit *TI) { + if (TI->getType()->typeIsConvertibleTo(this)) + return TI; + return 0; +} + +Init *DagRecTy::convertValue(TypedInit *TI) { + if (TI->getType()->typeIsConvertibleTo(this)) + return TI; + return 0; +} + +Init *DagRecTy::convertValue(UnOpInit *BO) { + if (BO->getOpcode() == UnOpInit::CAST) { + Init *L = BO->getOperand()->convertInitializerTo(this); + if (L == 0) return 0; + if (L != BO->getOperand()) + return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy); + return BO; + } + return 0; +} + +Init *DagRecTy::convertValue(BinOpInit *BO) { + if (BO->getOpcode() == BinOpInit::CONCAT) { + Init *L = BO->getLHS()->convertInitializerTo(this); + Init *R = BO->getRHS()->convertInitializerTo(this); + if (L == 0 || R == 0) return 0; + if (L != BO->getLHS() || R != BO->getRHS()) + return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy); + return BO; + } + return 0; +} + +RecordRecTy *RecordRecTy::get(Record *R) { + return &dynamic_cast<RecordRecTy&>(*R->getDefInit()->getType()); +} + +std::string RecordRecTy::getAsString() const { + return Rec->getName(); +} + +Init *RecordRecTy::convertValue(DefInit *DI) { + // Ensure that DI is a subclass of Rec. + if (!DI->getDef()->isSubClassOf(Rec)) + return 0; + return DI; +} + +Init *RecordRecTy::convertValue(TypedInit *TI) { + // Ensure that TI is compatible with Rec. + if (RecordRecTy *RRT = dynamic_cast<RecordRecTy*>(TI->getType())) + if (RRT->getRecord()->isSubClassOf(getRecord()) || + RRT->getRecord() == getRecord()) + return TI; + return 0; +} + +bool RecordRecTy::baseClassOf(const RecordRecTy *RHS) const { + if (Rec == RHS->getRecord() || RHS->getRecord()->isSubClassOf(Rec)) + return true; + + const std::vector<Record*> &SC = Rec->getSuperClasses(); + for (unsigned i = 0, e = SC.size(); i != e; ++i) + if (RHS->getRecord()->isSubClassOf(SC[i])) + return true; + + return false; +} + + +/// resolveTypes - Find a common type that T1 and T2 convert to. +/// Return 0 if no such type exists. +/// +RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { + if (!T1->typeIsConvertibleTo(T2)) { + if (!T2->typeIsConvertibleTo(T1)) { + // If one is a Record type, check superclasses + RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1); + if (RecTy1) { + // See if T2 inherits from a type T1 also inherits from + const std::vector<Record *> &T1SuperClasses = + RecTy1->getRecord()->getSuperClasses(); + for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(), + iend = T1SuperClasses.end(); + i != iend; + ++i) { + RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i); + RecTy *NewType1 = resolveTypes(SuperRecTy1, T2); + if (NewType1 != 0) { + if (NewType1 != SuperRecTy1) { + delete SuperRecTy1; + } + return NewType1; + } + } + } + RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2); + if (RecTy2) { + // See if T1 inherits from a type T2 also inherits from + const std::vector<Record *> &T2SuperClasses = + RecTy2->getRecord()->getSuperClasses(); + for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(), + iend = T2SuperClasses.end(); + i != iend; + ++i) { + RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i); + RecTy *NewType2 = resolveTypes(T1, SuperRecTy2); + if (NewType2 != 0) { + if (NewType2 != SuperRecTy2) { + delete SuperRecTy2; + } + return NewType2; + } + } + } + return 0; + } + return T2; + } + return T1; +} + + +//===----------------------------------------------------------------------===// +// Initializer implementations +//===----------------------------------------------------------------------===// + +void Init::dump() const { return print(errs()); } + +UnsetInit *UnsetInit::get() { + static UnsetInit TheInit; + return &TheInit; +} + +BitInit *BitInit::get(bool V) { + static BitInit True(true); + static BitInit False(false); + + return V ? &True : &False; +} + +static void +ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) { + ID.AddInteger(Range.size()); + + for (ArrayRef<Init *>::iterator i = Range.begin(), + iend = Range.end(); + i != iend; + ++i) + ID.AddPointer(*i); +} + +BitsInit *BitsInit::get(ArrayRef<Init *> Range) { + typedef FoldingSet<BitsInit> Pool; + static Pool ThePool; + + FoldingSetNodeID ID; + ProfileBitsInit(ID, Range); + + void *IP = 0; + if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) + return I; + + BitsInit *I = new BitsInit(Range); + ThePool.InsertNode(I, IP); + + return I; +} + +void BitsInit::Profile(FoldingSetNodeID &ID) const { + ProfileBitsInit(ID, Bits); +} + +Init * +BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { + SmallVector<Init *, 16> NewBits(Bits.size()); + + for (unsigned i = 0, e = Bits.size(); i != e; ++i) { + if (Bits[i] >= getNumBits()) + return 0; + NewBits[i] = getBit(Bits[i]); + } + return BitsInit::get(NewBits); +} + +std::string BitsInit::getAsString() const { + std::string Result = "{ "; + for (unsigned i = 0, e = getNumBits(); i != e; ++i) { + if (i) Result += ", "; + if (Init *Bit = getBit(e-i-1)) + Result += Bit->getAsString(); + else + Result += "*"; + } + return Result + " }"; +} + +// resolveReferences - If there are any field references that refer to fields +// that have been filled in, we can propagate the values now. +// +Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { + bool Changed = false; + SmallVector<Init *, 16> NewBits(getNumBits()); + + for (unsigned i = 0, e = Bits.size(); i != e; ++i) { + Init *B; + Init *CurBit = getBit(i); + + do { + B = CurBit; + CurBit = CurBit->resolveReferences(R, RV); + Changed |= B != CurBit; + } while (B != CurBit); + NewBits[i] = CurBit; + } + + if (Changed) + return BitsInit::get(NewBits); + + return const_cast<BitsInit *>(this); +} + +IntInit *IntInit::get(int64_t V) { + typedef DenseMap<int64_t, IntInit *> Pool; + static Pool ThePool; + + IntInit *&I = ThePool[V]; + if (!I) I = new IntInit(V); + return I; +} + +std::string IntInit::getAsString() const { + return itostr(Value); +} + +Init * +IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { + SmallVector<Init *, 16> NewBits(Bits.size()); + + for (unsigned i = 0, e = Bits.size(); i != e; ++i) { + if (Bits[i] >= 64) + return 0; + + NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i])); + } + return BitsInit::get(NewBits); +} + +StringInit *StringInit::get(const std::string &V) { + typedef StringMap<StringInit *> Pool; + static Pool ThePool; + + StringInit *&I = ThePool[V]; + if (!I) I = new StringInit(V); + return I; +} + +CodeInit *CodeInit::get(const std::string &V) { + typedef StringMap<CodeInit *> Pool; + static Pool ThePool; + + CodeInit *&I = ThePool[V]; + if (!I) I = new CodeInit(V); + return I; +} + +static void ProfileListInit(FoldingSetNodeID &ID, + ArrayRef<Init *> Range, + RecTy *EltTy) { + ID.AddInteger(Range.size()); + ID.AddPointer(EltTy); + + for (ArrayRef<Init *>::iterator i = Range.begin(), + iend = Range.end(); + i != iend; + ++i) + ID.AddPointer(*i); +} + +ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) { + typedef FoldingSet<ListInit> Pool; + static Pool ThePool; + + // Just use the FoldingSetNodeID to compute a hash. Use a DenseMap + // for actual storage. + FoldingSetNodeID ID; + ProfileListInit(ID, Range, EltTy); + + void *IP = 0; + if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) + return I; + + ListInit *I = new ListInit(Range, EltTy); + ThePool.InsertNode(I, IP); + return I; +} + +void ListInit::Profile(FoldingSetNodeID &ID) const { + ListRecTy *ListType = dynamic_cast<ListRecTy *>(getType()); + assert(ListType && "Bad type for ListInit!"); + RecTy *EltTy = ListType->getElementType(); + + ProfileListInit(ID, Values, EltTy); +} + +Init * +ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const { + std::vector<Init*> Vals; + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] >= getSize()) + return 0; + Vals.push_back(getElement(Elements[i])); + } + return ListInit::get(Vals, getType()); +} + +Record *ListInit::getElementAsRecord(unsigned i) const { + assert(i < Values.size() && "List element index out of range!"); + DefInit *DI = dynamic_cast<DefInit*>(Values[i]); + if (DI == 0) throw "Expected record in list!"; + return DI->getDef(); +} + +Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const { + std::vector<Init*> Resolved; + Resolved.reserve(getSize()); + bool Changed = false; + + for (unsigned i = 0, e = getSize(); i != e; ++i) { + Init *E; + Init *CurElt = getElement(i); + + do { + E = CurElt; + CurElt = CurElt->resolveReferences(R, RV); + Changed |= E != CurElt; + } while (E != CurElt); + Resolved.push_back(E); + } + + if (Changed) + return ListInit::get(Resolved, getType()); + return const_cast<ListInit *>(this); +} + +Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV, + unsigned Elt) const { + if (Elt >= getSize()) + return 0; // Out of range reference. + Init *E = getElement(Elt); + // If the element is set to some value, or if we are resolving a reference + // to a specific variable and that variable is explicitly unset, then + // replace the VarListElementInit with it. + if (IRV || !dynamic_cast<UnsetInit*>(E)) + return E; + return 0; +} + +std::string ListInit::getAsString() const { + std::string Result = "["; + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + if (i) Result += ", "; + Result += Values[i]->getAsString(); + } + return Result + "]"; +} + +Init *OpInit::resolveBitReference(Record &R, const RecordVal *IRV, + unsigned Bit) const { + Init *Folded = Fold(&R, 0); + + if (Folded != this) { + TypedInit *Typed = dynamic_cast<TypedInit *>(Folded); + if (Typed) { + return Typed->resolveBitReference(R, IRV, Bit); + } + } + + return 0; +} + +Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, + unsigned Elt) const { + Init *Resolved = resolveReferences(R, IRV); + OpInit *OResolved = dynamic_cast<OpInit *>(Resolved); + if (OResolved) { + Resolved = OResolved->Fold(&R, 0); + } + + if (Resolved != this) { + TypedInit *Typed = dynamic_cast<TypedInit *>(Resolved); + assert(Typed && "Expected typed init for list reference"); + if (Typed) { + Init *New = Typed->resolveListElementReference(R, IRV, Elt); + if (New) + return New; + return VarListElementInit::get(Typed, Elt); + } + } + + return 0; +} + +UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) { + typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key; + + typedef DenseMap<Key, UnOpInit *> Pool; + static Pool ThePool; + + Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type)); + + UnOpInit *&I = ThePool[TheKey]; + if (!I) I = new UnOpInit(opc, lhs, Type); + return I; +} + +Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { + switch (getOpcode()) { + default: assert(0 && "Unknown unop"); + case CAST: { + if (getType()->getAsString() == "string") { + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + if (LHSs) { + return LHSs; + } + + DefInit *LHSd = dynamic_cast<DefInit*>(LHS); + if (LHSd) { + return StringInit::get(LHSd->getDef()->getName()); + } + } else { + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + if (LHSs) { + std::string Name = LHSs->getValue(); + + // From TGParser::ParseIDValue + if (CurRec) { + if (const RecordVal *RV = CurRec->getValue(Name)) { + if (RV->getType() != getType()) + throw "type mismatch in cast"; + return VarInit::get(Name, RV->getType()); + } + + std::string TemplateArgName = CurRec->getName()+":"+Name; + if (CurRec->isTemplateArg(TemplateArgName)) { + const RecordVal *RV = CurRec->getValue(TemplateArgName); + assert(RV && "Template arg doesn't exist??"); + + if (RV->getType() != getType()) + throw "type mismatch in cast"; + + return VarInit::get(TemplateArgName, RV->getType()); + } + } + + if (CurMultiClass) { + std::string MCName = CurMultiClass->Rec.getName()+"::"+Name; + if (CurMultiClass->Rec.isTemplateArg(MCName)) { + const RecordVal *RV = CurMultiClass->Rec.getValue(MCName); + assert(RV && "Template arg doesn't exist??"); + + if (RV->getType() != getType()) + throw "type mismatch in cast"; + + return VarInit::get(MCName, RV->getType()); + } + } + + if (Record *D = (CurRec->getRecords()).getDef(Name)) + return DefInit::get(D); + + throw TGError(CurRec->getLoc(), "Undefined reference:'" + Name + "'\n"); + } + } + break; + } + case HEAD: { + ListInit *LHSl = dynamic_cast<ListInit*>(LHS); + if (LHSl) { + if (LHSl->getSize() == 0) { + assert(0 && "Empty list in car"); + return 0; + } + return LHSl->getElement(0); + } + break; + } + case TAIL: { + ListInit *LHSl = dynamic_cast<ListInit*>(LHS); + if (LHSl) { + if (LHSl->getSize() == 0) { + assert(0 && "Empty list in cdr"); + return 0; + } + // Note the +1. We can't just pass the result of getValues() + // directly. + ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1; + ArrayRef<Init *>::iterator end = LHSl->getValues().end(); + ListInit *Result = + ListInit::get(ArrayRef<Init *>(begin, end - begin), + LHSl->getType()); + return Result; + } + break; + } + case EMPTY: { + ListInit *LHSl = dynamic_cast<ListInit*>(LHS); + if (LHSl) { + if (LHSl->getSize() == 0) { + return IntInit::get(1); + } else { + return IntInit::get(0); + } + } + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + if (LHSs) { + if (LHSs->getValue().empty()) { + return IntInit::get(1); + } else { + return IntInit::get(0); + } + } + + break; + } + } + return const_cast<UnOpInit *>(this); +} + +Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const { + Init *lhs = LHS->resolveReferences(R, RV); + + if (LHS != lhs) + return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, 0); + return Fold(&R, 0); +} + +std::string UnOpInit::getAsString() const { + std::string Result; + switch (Opc) { + case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break; + case HEAD: Result = "!head"; break; + case TAIL: Result = "!tail"; break; + case EMPTY: Result = "!empty"; break; + } + return Result + "(" + LHS->getAsString() + ")"; +} + +BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs, + Init *rhs, RecTy *Type) { + typedef std::pair< + std::pair<std::pair<unsigned, Init *>, Init *>, + RecTy * + > Key; + + typedef DenseMap<Key, BinOpInit *> Pool; + static Pool ThePool; + + Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs), + Type)); + + BinOpInit *&I = ThePool[TheKey]; + if (!I) I = new BinOpInit(opc, lhs, rhs, Type); + return I; +} + +Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { + switch (getOpcode()) { + default: assert(0 && "Unknown binop"); + case CONCAT: { + DagInit *LHSs = dynamic_cast<DagInit*>(LHS); + DagInit *RHSs = dynamic_cast<DagInit*>(RHS); + if (LHSs && RHSs) { + DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator()); + DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator()); + if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef()) + throw "Concated Dag operators do not match!"; + std::vector<Init*> Args; + std::vector<std::string> ArgNames; + for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) { + Args.push_back(LHSs->getArg(i)); + ArgNames.push_back(LHSs->getArgName(i)); + } + for (unsigned i = 0, e = RHSs->getNumArgs(); i != e; ++i) { + Args.push_back(RHSs->getArg(i)); + ArgNames.push_back(RHSs->getArgName(i)); + } + return DagInit::get(LHSs->getOperator(), "", Args, ArgNames); + } + break; + } + case STRCONCAT: { + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + if (LHSs && RHSs) + return StringInit::get(LHSs->getValue() + RHSs->getValue()); + break; + } + case EQ: { + // try to fold eq comparison for 'bit' and 'int', otherwise fallback + // to string objects. + IntInit* L = + dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get())); + IntInit* R = + dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get())); + + if (L && R) + return IntInit::get(L->getValue() == R->getValue()); + + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + + // Make sure we've resolved + if (LHSs && RHSs) + return IntInit::get(LHSs->getValue() == RHSs->getValue()); + + break; + } + case SHL: + case SRA: + case SRL: { + IntInit *LHSi = dynamic_cast<IntInit*>(LHS); + IntInit *RHSi = dynamic_cast<IntInit*>(RHS); + if (LHSi && RHSi) { + int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue(); + int64_t Result; + switch (getOpcode()) { + default: assert(0 && "Bad opcode!"); + case SHL: Result = LHSv << RHSv; break; + case SRA: Result = LHSv >> RHSv; break; + case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break; + } + return IntInit::get(Result); + } + break; + } + } + return const_cast<BinOpInit *>(this); +} + +Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const { + Init *lhs = LHS->resolveReferences(R, RV); + Init *rhs = RHS->resolveReferences(R, RV); + + if (LHS != lhs || RHS != rhs) + return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0); + return Fold(&R, 0); +} + +std::string BinOpInit::getAsString() const { + std::string Result; + switch (Opc) { + case CONCAT: Result = "!con"; break; + case SHL: Result = "!shl"; break; + case SRA: Result = "!sra"; break; + case SRL: Result = "!srl"; break; + case EQ: Result = "!eq"; break; + case STRCONCAT: Result = "!strconcat"; break; + } + return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")"; +} + +TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs, + Init *mhs, Init *rhs, + RecTy *Type) { + typedef std::pair< + std::pair< + std::pair<std::pair<unsigned, RecTy *>, Init *>, + Init * + >, + Init * + > Key; + + typedef DenseMap<Key, TernOpInit *> Pool; + static Pool ThePool; + + Key TheKey(std::make_pair(std::make_pair(std::make_pair(std::make_pair(opc, + Type), + lhs), + mhs), + rhs)); + + TernOpInit *&I = ThePool[TheKey]; + if (!I) I = new TernOpInit(opc, lhs, mhs, rhs, Type); + return I; +} + +static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, + Record *CurRec, MultiClass *CurMultiClass); + +static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, + RecTy *Type, Record *CurRec, + MultiClass *CurMultiClass) { + std::vector<Init *> NewOperands; + + TypedInit *TArg = dynamic_cast<TypedInit*>(Arg); + + // If this is a dag, recurse + if (TArg && TArg->getType()->getAsString() == "dag") { + Init *Result = ForeachHelper(LHS, Arg, RHSo, Type, + CurRec, CurMultiClass); + if (Result != 0) { + return Result; + } else { + return 0; + } + } + + for (int i = 0; i < RHSo->getNumOperands(); ++i) { + OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i)); + + if (RHSoo) { + Init *Result = EvaluateOperation(RHSoo, LHS, Arg, + Type, CurRec, CurMultiClass); + if (Result != 0) { + NewOperands.push_back(Result); + } else { + NewOperands.push_back(Arg); + } + } else if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) { + NewOperands.push_back(Arg); + } else { + NewOperands.push_back(RHSo->getOperand(i)); + } + } + + // Now run the operator and use its result as the new leaf + const OpInit *NewOp = RHSo->clone(NewOperands); + Init *NewVal = NewOp->Fold(CurRec, CurMultiClass); + if (NewVal != NewOp) + return NewVal; + + return 0; +} + +static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, + Record *CurRec, MultiClass *CurMultiClass) { + DagInit *MHSd = dynamic_cast<DagInit*>(MHS); + ListInit *MHSl = dynamic_cast<ListInit*>(MHS); + + DagRecTy *DagType = dynamic_cast<DagRecTy*>(Type); + ListRecTy *ListType = dynamic_cast<ListRecTy*>(Type); + + OpInit *RHSo = dynamic_cast<OpInit*>(RHS); + + if (!RHSo) { + throw TGError(CurRec->getLoc(), "!foreach requires an operator\n"); + } + + TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS); + + if (!LHSt) { + throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n"); + } + + if ((MHSd && DagType) || (MHSl && ListType)) { + if (MHSd) { + Init *Val = MHSd->getOperator(); + Init *Result = EvaluateOperation(RHSo, LHS, Val, + Type, CurRec, CurMultiClass); + if (Result != 0) { + Val = Result; + } + + std::vector<std::pair<Init *, std::string> > args; + for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) { + Init *Arg; + std::string ArgName; + Arg = MHSd->getArg(i); + ArgName = MHSd->getArgName(i); + + // Process args + Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type, + CurRec, CurMultiClass); + if (Result != 0) { + Arg = Result; + } + + // TODO: Process arg names + args.push_back(std::make_pair(Arg, ArgName)); + } + + return DagInit::get(Val, "", args); + } + if (MHSl) { + std::vector<Init *> NewOperands; + std::vector<Init *> NewList(MHSl->begin(), MHSl->end()); + + for (std::vector<Init *>::iterator li = NewList.begin(), + liend = NewList.end(); + li != liend; + ++li) { + Init *Item = *li; + NewOperands.clear(); + for(int i = 0; i < RHSo->getNumOperands(); ++i) { + // First, replace the foreach variable with the list item + if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) { + NewOperands.push_back(Item); + } else { + NewOperands.push_back(RHSo->getOperand(i)); + } + } + + // Now run the operator and use its result as the new list item + const OpInit *NewOp = RHSo->clone(NewOperands); + Init *NewItem = NewOp->Fold(CurRec, CurMultiClass); + if (NewItem != NewOp) + *li = NewItem; + } + return ListInit::get(NewList, MHSl->getType()); + } + } + return 0; +} + +Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { + switch (getOpcode()) { + default: assert(0 && "Unknown binop"); + case SUBST: { + DefInit *LHSd = dynamic_cast<DefInit*>(LHS); + VarInit *LHSv = dynamic_cast<VarInit*>(LHS); + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + + DefInit *MHSd = dynamic_cast<DefInit*>(MHS); + VarInit *MHSv = dynamic_cast<VarInit*>(MHS); + StringInit *MHSs = dynamic_cast<StringInit*>(MHS); + + DefInit *RHSd = dynamic_cast<DefInit*>(RHS); + VarInit *RHSv = dynamic_cast<VarInit*>(RHS); + StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + + if ((LHSd && MHSd && RHSd) + || (LHSv && MHSv && RHSv) + || (LHSs && MHSs && RHSs)) { + if (RHSd) { + Record *Val = RHSd->getDef(); + if (LHSd->getAsString() == RHSd->getAsString()) { + Val = MHSd->getDef(); + } + return DefInit::get(Val); + } + if (RHSv) { + std::string Val = RHSv->getName(); + if (LHSv->getAsString() == RHSv->getAsString()) { + Val = MHSv->getName(); + } + return VarInit::get(Val, getType()); + } + if (RHSs) { + std::string Val = RHSs->getValue(); + + std::string::size_type found; + std::string::size_type idx = 0; + do { + found = Val.find(LHSs->getValue(), idx); + if (found != std::string::npos) { + Val.replace(found, LHSs->getValue().size(), MHSs->getValue()); + } + idx = found + MHSs->getValue().size(); + } while (found != std::string::npos); + + return StringInit::get(Val); + } + } + break; + } + + case FOREACH: { + Init *Result = ForeachHelper(LHS, MHS, RHS, getType(), + CurRec, CurMultiClass); + if (Result != 0) { + return Result; + } + break; + } + + case IF: { + IntInit *LHSi = dynamic_cast<IntInit*>(LHS); + if (Init *I = LHS->convertInitializerTo(IntRecTy::get())) + LHSi = dynamic_cast<IntInit*>(I); + if (LHSi) { + if (LHSi->getValue()) { + return MHS; + } else { + return RHS; + } + } + break; + } + } + + return const_cast<TernOpInit *>(this); +} + +Init *TernOpInit::resolveReferences(Record &R, + const RecordVal *RV) const { + Init *lhs = LHS->resolveReferences(R, RV); + + if (Opc == IF && lhs != LHS) { + IntInit *Value = dynamic_cast<IntInit*>(lhs); + if (Init *I = lhs->convertInitializerTo(IntRecTy::get())) + Value = dynamic_cast<IntInit*>(I); + if (Value != 0) { + // Short-circuit + if (Value->getValue()) { + Init *mhs = MHS->resolveReferences(R, RV); + return (TernOpInit::get(getOpcode(), lhs, mhs, + RHS, getType()))->Fold(&R, 0); + } else { + Init *rhs = RHS->resolveReferences(R, RV); + return (TernOpInit::get(getOpcode(), lhs, MHS, + rhs, getType()))->Fold(&R, 0); + } + } + } + + Init *mhs = MHS->resolveReferences(R, RV); + Init *rhs = RHS->resolveReferences(R, RV); + + if (LHS != lhs || MHS != mhs || RHS != rhs) + return (TernOpInit::get(getOpcode(), lhs, mhs, rhs, + getType()))->Fold(&R, 0); + return Fold(&R, 0); +} + +std::string TernOpInit::getAsString() const { + std::string Result; + switch (Opc) { + case SUBST: Result = "!subst"; break; + case FOREACH: Result = "!foreach"; break; + case IF: Result = "!if"; break; + } + return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", " + + RHS->getAsString() + ")"; +} + +RecTy *TypedInit::getFieldType(const std::string &FieldName) const { + RecordRecTy *RecordType = dynamic_cast<RecordRecTy *>(getType()); + if (RecordType) { + RecordVal *Field = RecordType->getRecord()->getValue(FieldName); + if (Field) { + return Field->getType(); + } + } + return 0; +} + +Init * +TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { + BitsRecTy *T = dynamic_cast<BitsRecTy*>(getType()); + if (T == 0) return 0; // Cannot subscript a non-bits variable. + unsigned NumBits = T->getNumBits(); + + SmallVector<Init *, 16> NewBits(Bits.size()); + for (unsigned i = 0, e = Bits.size(); i != e; ++i) { + if (Bits[i] >= NumBits) + return 0; + + NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), Bits[i]); + } + return BitsInit::get(NewBits); +} + +Init * +TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const { + ListRecTy *T = dynamic_cast<ListRecTy*>(getType()); + if (T == 0) return 0; // Cannot subscript a non-list variable. + + if (Elements.size() == 1) + return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]); + + std::vector<Init*> ListInits; + ListInits.reserve(Elements.size()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) + ListInits.push_back(VarListElementInit::get(const_cast<TypedInit *>(this), + Elements[i])); + return ListInit::get(ListInits, T); +} + + +VarInit *VarInit::get(const std::string &VN, RecTy *T) { + typedef std::pair<RecTy *, TableGenStringKey> Key; + typedef DenseMap<Key, VarInit *> Pool; + static Pool ThePool; + + Key TheKey(std::make_pair(T, VN)); + + VarInit *&I = ThePool[TheKey]; + if (!I) I = new VarInit(VN, T); + return I; +} + +Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV, + unsigned Bit) const { + if (R.isTemplateArg(getName())) return 0; + if (IRV && IRV->getName() != getName()) return 0; + + RecordVal *RV = R.getValue(getName()); + assert(RV && "Reference to a non-existent variable?"); + assert(dynamic_cast<BitsInit*>(RV->getValue())); + BitsInit *BI = (BitsInit*)RV->getValue(); + + assert(Bit < BI->getNumBits() && "Bit reference out of range!"); + Init *B = BI->getBit(Bit); + + // If the bit is set to some value, or if we are resolving a reference to a + // specific variable and that variable is explicitly unset, then replace the + // VarBitInit with it. + if (IRV || !dynamic_cast<UnsetInit*>(B)) + return B; + return 0; +} + +Init *VarInit::resolveListElementReference(Record &R, + const RecordVal *IRV, + unsigned Elt) const { + if (R.isTemplateArg(getName())) return 0; + if (IRV && IRV->getName() != getName()) return 0; + + RecordVal *RV = R.getValue(getName()); + assert(RV && "Reference to a non-existent variable?"); + ListInit *LI = dynamic_cast<ListInit*>(RV->getValue()); + if (!LI) { + TypedInit *VI = dynamic_cast<TypedInit*>(RV->getValue()); + assert(VI && "Invalid list element!"); + return VarListElementInit::get(VI, Elt); + } + + if (Elt >= LI->getSize()) + return 0; // Out of range reference. + Init *E = LI->getElement(Elt); + // If the element is set to some value, or if we are resolving a reference + // to a specific variable and that variable is explicitly unset, then + // replace the VarListElementInit with it. + if (IRV || !dynamic_cast<UnsetInit*>(E)) + return E; + return 0; +} + + +RecTy *VarInit::getFieldType(const std::string &FieldName) const { + if (RecordRecTy *RTy = dynamic_cast<RecordRecTy*>(getType())) + if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName)) + return RV->getType(); + return 0; +} + +Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, + const std::string &FieldName) const { + if (dynamic_cast<RecordRecTy*>(getType())) + if (const RecordVal *Val = R.getValue(VarName)) { + if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue()))) + return 0; + Init *TheInit = Val->getValue(); + assert(TheInit != this && "Infinite loop detected!"); + if (Init *I = TheInit->getFieldInit(R, RV, FieldName)) + return I; + else + return 0; + } + return 0; +} + +/// resolveReferences - This method is used by classes that refer to other +/// variables which may not be defined at the time the expression is formed. +/// If a value is set for the variable later, this method will be called on +/// users of the value to allow the value to propagate out. +/// +Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { + if (RecordVal *Val = R.getValue(VarName)) + if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue()))) + return Val->getValue(); + return const_cast<VarInit *>(this); +} + +VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { + typedef std::pair<TypedInit *, unsigned> Key; + typedef DenseMap<Key, VarBitInit *> Pool; + + static Pool ThePool; + + Key TheKey(std::make_pair(T, B)); + + VarBitInit *&I = ThePool[TheKey]; + if (!I) I = new VarBitInit(T, B); + return I; +} + +std::string VarBitInit::getAsString() const { + return TI->getAsString() + "{" + utostr(Bit) + "}"; +} + +Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { + if (Init *I = getVariable()->resolveBitReference(R, RV, getBitNum())) + return I; + return const_cast<VarBitInit *>(this); +} + +VarListElementInit *VarListElementInit::get(TypedInit *T, + unsigned E) { + typedef std::pair<TypedInit *, unsigned> Key; + typedef DenseMap<Key, VarListElementInit *> Pool; + + static Pool ThePool; + + Key TheKey(std::make_pair(T, E)); + + VarListElementInit *&I = ThePool[TheKey]; + if (!I) I = new VarListElementInit(T, E); + return I; +} + +std::string VarListElementInit::getAsString() const { + return TI->getAsString() + "[" + utostr(Element) + "]"; +} + +Init * +VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) const { + if (Init *I = getVariable()->resolveListElementReference(R, RV, + getElementNum())) + return I; + return const_cast<VarListElementInit *>(this); +} + +Init *VarListElementInit::resolveBitReference(Record &R, const RecordVal *RV, + unsigned Bit) const { + // FIXME: This should be implemented, to support references like: + // bit B = AA[0]{1}; + return 0; +} + +Init *VarListElementInit:: resolveListElementReference(Record &R, + const RecordVal *RV, + unsigned Elt) const { + Init *Result = TI->resolveListElementReference(R, RV, Element); + + if (Result) { + TypedInit *TInit = dynamic_cast<TypedInit *>(Result); + if (TInit) { + Init *Result2 = TInit->resolveListElementReference(R, RV, Elt); + if (Result2) return Result2; + return new VarListElementInit(TInit, Elt); + } + return Result; + } + + return 0; +} + +DefInit *DefInit::get(Record *R) { + return R->getDefInit(); +} + +RecTy *DefInit::getFieldType(const std::string &FieldName) const { + if (const RecordVal *RV = Def->getValue(FieldName)) + return RV->getType(); + return 0; +} + +Init *DefInit::getFieldInit(Record &R, const RecordVal *RV, + const std::string &FieldName) const { + return Def->getValue(FieldName)->getValue(); +} + + +std::string DefInit::getAsString() const { + return Def->getName(); +} + +FieldInit *FieldInit::get(Init *R, const std::string &FN) { + typedef std::pair<Init *, TableGenStringKey> Key; + typedef DenseMap<Key, FieldInit *> Pool; + static Pool ThePool; + + Key TheKey(std::make_pair(R, FN)); + + FieldInit *&I = ThePool[TheKey]; + if (!I) I = new FieldInit(R, FN); + return I; +} + +Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV, + unsigned Bit) const { + if (Init *BitsVal = Rec->getFieldInit(R, RV, FieldName)) + if (BitsInit *BI = dynamic_cast<BitsInit*>(BitsVal)) { + assert(Bit < BI->getNumBits() && "Bit reference out of range!"); + Init *B = BI->getBit(Bit); + + if (dynamic_cast<BitInit*>(B)) // If the bit is set. + return B; // Replace the VarBitInit with it. + } + return 0; +} + +Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, + unsigned Elt) const { + if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName)) + if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) { + if (Elt >= LI->getSize()) return 0; + Init *E = LI->getElement(Elt); + + // If the element is set to some value, or if we are resolving a + // reference to a specific variable and that variable is explicitly + // unset, then replace the VarListElementInit with it. + if (RV || !dynamic_cast<UnsetInit*>(E)) + return E; + } + return 0; +} + +Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const { + Init *NewRec = RV ? Rec->resolveReferences(R, RV) : Rec; + + Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName); + if (BitsVal) { + Init *BVR = BitsVal->resolveReferences(R, RV); + return BVR->isComplete() ? BVR : const_cast<FieldInit *>(this); + } + + if (NewRec != Rec) { + return FieldInit::get(NewRec, FieldName); + } + return const_cast<FieldInit *>(this); +} + +void ProfileDagInit(FoldingSetNodeID &ID, + Init *V, + const std::string &VN, + ArrayRef<Init *> ArgRange, + ArrayRef<std::string> NameRange) { + ID.AddPointer(V); + ID.AddString(VN); + + ArrayRef<Init *>::iterator Arg = ArgRange.begin(); + ArrayRef<std::string>::iterator Name = NameRange.begin(); + while (Arg != ArgRange.end()) { + assert(Name != NameRange.end() && "Arg name underflow!"); + ID.AddPointer(*Arg++); + ID.AddString(*Name++); + } + assert(Name == NameRange.end() && "Arg name overflow!"); +} + +DagInit * +DagInit::get(Init *V, const std::string &VN, + ArrayRef<Init *> ArgRange, + ArrayRef<std::string> NameRange) { + typedef FoldingSet<DagInit> Pool; + static Pool ThePool; + + FoldingSetNodeID ID; + ProfileDagInit(ID, V, VN, ArgRange, NameRange); + + void *IP = 0; + if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) + return I; + + DagInit *I = new DagInit(V, VN, ArgRange, NameRange); + ThePool.InsertNode(I, IP); + + return I; +} + +DagInit * +DagInit::get(Init *V, const std::string &VN, + const std::vector<std::pair<Init*, std::string> > &args) { + typedef std::pair<Init*, std::string> PairType; + + std::vector<Init *> Args; + std::vector<std::string> Names; + + for (std::vector<PairType>::const_iterator i = args.begin(), + iend = args.end(); + i != iend; + ++i) { + Args.push_back(i->first); + Names.push_back(i->second); + } + + return DagInit::get(V, VN, Args, Names); +} + +void DagInit::Profile(FoldingSetNodeID &ID) const { + ProfileDagInit(ID, Val, ValName, Args, ArgNames); +} + +Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { + std::vector<Init*> NewArgs; + for (unsigned i = 0, e = Args.size(); i != e; ++i) + NewArgs.push_back(Args[i]->resolveReferences(R, RV)); + + Init *Op = Val->resolveReferences(R, RV); + + if (Args != NewArgs || Op != Val) + return DagInit::get(Op, ValName, NewArgs, ArgNames); + + return const_cast<DagInit *>(this); +} + + +std::string DagInit::getAsString() const { + std::string Result = "(" + Val->getAsString(); + if (!ValName.empty()) + Result += ":" + ValName; + if (Args.size()) { + Result += " " + Args[0]->getAsString(); + if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0]; + for (unsigned i = 1, e = Args.size(); i != e; ++i) { + Result += ", " + Args[i]->getAsString(); + if (!ArgNames[i].empty()) Result += ":$" + ArgNames[i]; + } + } + return Result + ")"; +} + + +//===----------------------------------------------------------------------===// +// Other implementations +//===----------------------------------------------------------------------===// + +RecordVal::RecordVal(Init *N, RecTy *T, unsigned P) + : Name(N), Ty(T), Prefix(P) { + Value = Ty->convertValue(UnsetInit::get()); + assert(Value && "Cannot create unset value for current type!"); +} + +RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P) + : Name(StringInit::get(N)), Ty(T), Prefix(P) { + Value = Ty->convertValue(UnsetInit::get()); + assert(Value && "Cannot create unset value for current type!"); +} + +const std::string &RecordVal::getName() const { + StringInit *NameString = dynamic_cast<StringInit *>(Name); + assert(NameString && "RecordVal name is not a string!"); + return NameString->getValue(); +} + +void RecordVal::dump() const { errs() << *this; } + +void RecordVal::print(raw_ostream &OS, bool PrintSem) const { + if (getPrefix()) OS << "field "; + OS << *getType() << " " << getName(); + + if (getValue()) + OS << " = " << *getValue(); + + if (PrintSem) OS << ";\n"; +} + +unsigned Record::LastID = 0; + +void Record::checkName() { + // Ensure the record name has string type. + const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name); + assert(TypedName && "Record name is not typed!"); + RecTy *Type = TypedName->getType(); + if (dynamic_cast<StringRecTy *>(Type) == 0) { + llvm_unreachable("Record name is not a string!"); + } +} + +DefInit *Record::getDefInit() { + if (!TheInit) + TheInit = new DefInit(this, new RecordRecTy(this)); + return TheInit; +} + +const std::string &Record::getName() const { + const StringInit *NameString = + dynamic_cast<const StringInit *>(Name); + assert(NameString && "Record name is not a string!"); + return NameString->getValue(); +} + +void Record::setName(Init *NewName) { + if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) { + TrackedRecords.removeDef(Name->getAsUnquotedString()); + Name = NewName; + TrackedRecords.addDef(this); + } else { + TrackedRecords.removeClass(Name->getAsUnquotedString()); + Name = NewName; + TrackedRecords.addClass(this); + } + checkName(); + // Since the Init for the name was changed, see if we can resolve + // any of it using members of the Record. + Init *ComputedName = Name->resolveReferences(*this, 0); + if (ComputedName != Name) { + setName(ComputedName); + } + // DO NOT resolve record values to the name at this point because + // there might be default values for arguments of this def. Those + // arguments might not have been resolved yet so we don't want to + // prematurely assume values for those arguments were not passed to + // this def. + // + // Nonetheless, it may be that some of this Record's values + // reference the record name. Indeed, the reason for having the + // record name be an Init is to provide this flexibility. The extra + // resolve steps after completely instantiating defs takes care of + // this. See TGParser::ParseDef and TGParser::ParseDefm. +} + +void Record::setName(const std::string &Name) { + setName(StringInit::get(Name)); +} + +/// resolveReferencesTo - If anything in this record refers to RV, replace the +/// reference to RV with the RHS of RV. If RV is null, we resolve all possible +/// references. +void Record::resolveReferencesTo(const RecordVal *RV) { + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + if (Init *V = Values[i].getValue()) + Values[i].setValue(V->resolveReferences(*this, RV)); + } +} + +void Record::dump() const { errs() << *this; } + +raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { + OS << R.getName(); + + const std::vector<std::string> &TArgs = R.getTemplateArgs(); + if (!TArgs.empty()) { + OS << "<"; + for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { + if (i) OS << ", "; + const RecordVal *RV = R.getValue(TArgs[i]); + assert(RV && "Template argument record not found??"); + RV->print(OS, false); + } + OS << ">"; + } + + OS << " {"; + const std::vector<Record*> &SC = R.getSuperClasses(); + if (!SC.empty()) { + OS << "\t//"; + for (unsigned i = 0, e = SC.size(); i != e; ++i) + OS << " " << SC[i]->getName(); + } + OS << "\n"; + + const std::vector<RecordVal> &Vals = R.getValues(); + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName())) + OS << Vals[i]; + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName())) + OS << Vals[i]; + + return OS << "}\n"; +} + +/// getValueInit - Return the initializer for a value with the specified name, +/// or throw an exception if the field does not exist. +/// +Init *Record::getValueInit(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + return R->getValue(); +} + + +/// getValueAsString - This method looks up the specified field and returns its +/// value as a string, throwing an exception if the field does not exist or if +/// the value is not a string. +/// +std::string Record::getValueAsString(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (StringInit *SI = dynamic_cast<StringInit*>(R->getValue())) + return SI->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a string initializer!"; +} + +/// getValueAsBitsInit - This method looks up the specified field and returns +/// its value as a BitsInit, throwing an exception if the field does not exist +/// or if the value is not the right type. +/// +BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue())) + return BI; + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a BitsInit initializer!"; +} + +/// getValueAsListInit - This method looks up the specified field and returns +/// its value as a ListInit, throwing an exception if the field does not exist +/// or if the value is not the right type. +/// +ListInit *Record::getValueAsListInit(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue())) + return LI; + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a list initializer!"; +} + +/// getValueAsListOfDefs - This method looks up the specified field and returns +/// its value as a vector of records, throwing an exception if the field does +/// not exist or if the value is not the right type. +/// +std::vector<Record*> +Record::getValueAsListOfDefs(StringRef FieldName) const { + ListInit *List = getValueAsListInit(FieldName); + std::vector<Record*> Defs; + for (unsigned i = 0; i < List->getSize(); i++) { + if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) { + Defs.push_back(DI->getDef()); + } else { + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' list is not entirely DefInit!"; + } + } + return Defs; +} + +/// getValueAsInt - This method looks up the specified field and returns its +/// value as an int64_t, throwing an exception if the field does not exist or if +/// the value is not the right type. +/// +int64_t Record::getValueAsInt(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (IntInit *II = dynamic_cast<IntInit*>(R->getValue())) + return II->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have an int initializer!"; +} + +/// getValueAsListOfInts - This method looks up the specified field and returns +/// its value as a vector of integers, throwing an exception if the field does +/// not exist or if the value is not the right type. +/// +std::vector<int64_t> +Record::getValueAsListOfInts(StringRef FieldName) const { + ListInit *List = getValueAsListInit(FieldName); + std::vector<int64_t> Ints; + for (unsigned i = 0; i < List->getSize(); i++) { + if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) { + Ints.push_back(II->getValue()); + } else { + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a list of ints initializer!"; + } + } + return Ints; +} + +/// getValueAsListOfStrings - This method looks up the specified field and +/// returns its value as a vector of strings, throwing an exception if the +/// field does not exist or if the value is not the right type. +/// +std::vector<std::string> +Record::getValueAsListOfStrings(StringRef FieldName) const { + ListInit *List = getValueAsListInit(FieldName); + std::vector<std::string> Strings; + for (unsigned i = 0; i < List->getSize(); i++) { + if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) { + Strings.push_back(II->getValue()); + } else { + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a list of strings initializer!"; + } + } + return Strings; +} + +/// getValueAsDef - This method looks up the specified field and returns its +/// value as a Record, throwing an exception if the field does not exist or if +/// the value is not the right type. +/// +Record *Record::getValueAsDef(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue())) + return DI->getDef(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a def initializer!"; +} + +/// getValueAsBit - This method looks up the specified field and returns its +/// value as a bit, throwing an exception if the field does not exist or if +/// the value is not the right type. +/// +bool Record::getValueAsBit(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue())) + return BI->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a bit initializer!"; +} + +/// getValueAsDag - This method looks up the specified field and returns its +/// value as an Dag, throwing an exception if the field does not exist or if +/// the value is not the right type. +/// +DagInit *Record::getValueAsDag(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue())) + return DI; + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a dag initializer!"; +} + +std::string Record::getValueAsCode(StringRef FieldName) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (CodeInit *CI = dynamic_cast<CodeInit*>(R->getValue())) + return CI->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a code initializer!"; +} + + +void MultiClass::dump() const { + errs() << "Record:\n"; + Rec.dump(); + + errs() << "Defs:\n"; + for (RecordVector::const_iterator r = DefPrototypes.begin(), + rend = DefPrototypes.end(); + r != rend; + ++r) { + (*r)->dump(); + } +} + + +void RecordKeeper::dump() const { errs() << *this; } + +raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) { + OS << "------------- Classes -----------------\n"; + const std::map<std::string, Record*> &Classes = RK.getClasses(); + for (std::map<std::string, Record*>::const_iterator I = Classes.begin(), + E = Classes.end(); I != E; ++I) + OS << "class " << *I->second; + + OS << "------------- Defs -----------------\n"; + const std::map<std::string, Record*> &Defs = RK.getDefs(); + for (std::map<std::string, Record*>::const_iterator I = Defs.begin(), + E = Defs.end(); I != E; ++I) + OS << "def " << *I->second; + return OS; +} + + +/// getAllDerivedDefinitions - This method returns all concrete definitions +/// that derive from the specified class name. If a class with the specified +/// name does not exist, an error is printed and true is returned. +std::vector<Record*> +RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const { + Record *Class = getClass(ClassName); + if (!Class) + throw "ERROR: Couldn't find the `" + ClassName + "' class!\n"; + + std::vector<Record*> Defs; + for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(), + E = getDefs().end(); I != E; ++I) + if (I->second->isSubClassOf(Class)) + Defs.push_back(I->second); + + return Defs; +} + diff --git a/contrib/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm/lib/TableGen/TGLexer.cpp new file mode 100644 index 0000000..8c1b429 --- /dev/null +++ b/contrib/llvm/lib/TableGen/TGLexer.cpp @@ -0,0 +1,435 @@ +//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Lexer for TableGen. +// +//===----------------------------------------------------------------------===// + +#include "TGLexer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Config/config.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cerrno> +using namespace llvm; + +TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +SMLoc TGLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return tgtok::Error. +tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { + PrintError(Loc, Msg); + return tgtok::Error; +} + +int TGLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + case '\n': + case '\r': + // Handle the newline character by ignoring it and incrementing the line + // count. However, be careful about 'dos style' files with \n\r in them. + // Only treat a \n\r or \r\n as a single line. + if ((*CurPtr == '\n' || (*CurPtr == '\r')) && + *CurPtr != CurChar) + ++CurPtr; // Eat the two char newline sequence. + return '\n'; + } +} + +tgtok::TokKind TGLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle letters: [a-zA-Z_#] + if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "Unexpected character"); + case EOF: return tgtok::Eof; + case ':': return tgtok::colon; + case ';': return tgtok::semi; + case '.': return tgtok::period; + case ',': return tgtok::comma; + case '<': return tgtok::less; + case '>': return tgtok::greater; + case ']': return tgtok::r_square; + case '{': return tgtok::l_brace; + case '}': return tgtok::r_brace; + case '(': return tgtok::l_paren; + case ')': return tgtok::r_paren; + case '=': return tgtok::equal; + case '?': return tgtok::question; + + case 0: + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return LexToken(); + case '/': + // If this is the start of a // comment, skip until the end of the line or + // the end of the buffer. + if (*CurPtr == '/') + SkipBCPLComment(); + else if (*CurPtr == '*') { + if (SkipCComment()) + return tgtok::Error; + } else // Otherwise, this is an error. + return ReturnError(TokStart, "Unexpected character"); + return LexToken(); + case '-': case '+': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + return LexNumber(); + case '"': return LexString(); + case '$': return LexVarName(); + case '[': return LexBracket(); + case '!': return LexExclaim(); + } +} + +/// LexString - Lex "[^"]*" +tgtok::TokKind TGLexer::LexString() { + const char *StrStart = CurPtr; + + CurStrVal = ""; + + while (*CurPtr != '"') { + // If we hit the end of the buffer, report an error. + if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + + if (*CurPtr == '\n' || *CurPtr == '\r') + return ReturnError(StrStart, "End of line in string literal"); + + if (*CurPtr != '\\') { + CurStrVal += *CurPtr++; + continue; + } + + ++CurPtr; + + switch (*CurPtr) { + case '\\': case '\'': case '"': + // These turn into their literal character. + CurStrVal += *CurPtr++; + break; + case 't': + CurStrVal += '\t'; + ++CurPtr; + break; + case 'n': + CurStrVal += '\n'; + ++CurPtr; + break; + + case '\n': + case '\r': + return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); + + // If we hit the end of the buffer, report an error. + case '\0': + if (CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + // FALL THROUGH + default: + return ReturnError(CurPtr, "invalid escape in string literal"); + } + } + + ++CurPtr; + return tgtok::StrVal; +} + +tgtok::TokKind TGLexer::LexVarName() { + if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') + return ReturnError(TokStart, "Invalid variable name"); + + // Otherwise, we're ok, consume the rest of the characters. + const char *VarNameStart = CurPtr++; + + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') + ++CurPtr; + + CurStrVal.assign(VarNameStart, CurPtr); + return tgtok::VarName; +} + + +tgtok::TokKind TGLexer::LexIdentifier() { + // The first letter is [a-zA-Z_#]. + const char *IdentStart = TokStart; + + // Match the rest of the identifier regex: [0-9a-zA-Z_#]* + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' || + *CurPtr == '#') + ++CurPtr; + + // Check to see if this identifier is a keyword. + StringRef Str(IdentStart, CurPtr-IdentStart); + + if (Str == "include") { + if (LexInclude()) return tgtok::Error; + return Lex(); + } + + tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) + .Case("int", tgtok::Int) + .Case("bit", tgtok::Bit) + .Case("bits", tgtok::Bits) + .Case("string", tgtok::String) + .Case("list", tgtok::List) + .Case("code", tgtok::Code) + .Case("dag", tgtok::Dag) + .Case("class", tgtok::Class) + .Case("def", tgtok::Def) + .Case("defm", tgtok::Defm) + .Case("multiclass", tgtok::MultiClass) + .Case("field", tgtok::Field) + .Case("let", tgtok::Let) + .Case("in", tgtok::In) + .Default(tgtok::Id); + + if (Kind == tgtok::Id) + CurStrVal.assign(Str.begin(), Str.end()); + return Kind; +} + +/// LexInclude - We just read the "include" token. Get the string token that +/// comes next and enter the include. +bool TGLexer::LexInclude() { + // The token after the include must be a string. + tgtok::TokKind Tok = LexToken(); + if (Tok == tgtok::Error) return true; + if (Tok != tgtok::StrVal) { + PrintError(getLoc(), "Expected filename after include"); + return true; + } + + // Get the string. + std::string Filename = CurStrVal; + std::string IncludedFile; + + + CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), + IncludedFile); + if (CurBuffer == -1) { + PrintError(getLoc(), "Could not find include file '" + Filename + "'"); + return true; + } + + Dependencies.push_back(IncludedFile); + // Save the line number and lex buffer of the includer. + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + return false; +} + +void TGLexer::SkipBCPLComment() { + ++CurPtr; // skip the second slash. + while (1) { + switch (*CurPtr) { + case '\n': + case '\r': + return; // Newline is end of comment. + case 0: + // If this is the end of the buffer, end the comment. + if (CurPtr == CurBuf->getBufferEnd()) + return; + break; + } + // Otherwise, skip the character. + ++CurPtr; + } +} + +/// SkipCComment - This skips C-style /**/ comments. The only difference from C +/// is that we allow nesting. +bool TGLexer::SkipCComment() { + ++CurPtr; // skip the star. + unsigned CommentDepth = 1; + + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + PrintError(TokStart, "Unterminated comment!"); + return true; + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + if (--CommentDepth == 0) + return false; + break; + case '/': + // Start of a nested comment? + if (CurPtr[0] != '*') break; + ++CurPtr; + ++CommentDepth; + break; + } + } +} + +/// LexNumber - Lex: +/// [-+]?[0-9]+ +/// 0x[0-9a-fA-F]+ +/// 0b[01]+ +tgtok::TokKind TGLexer::LexNumber() { + if (CurPtr[-1] == '0') { + if (CurPtr[0] == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(TokStart, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(TokStart, "Hexadecimal number out of range"); + } + return tgtok::IntVal; + } else if (CurPtr[0] == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return tgtok::IntVal; + } + } + + // Check for a sign without a digit. + if (!isdigit(CurPtr[0])) { + if (CurPtr[-1] == '-') + return tgtok::minus; + else if (CurPtr[-1] == '+') + return tgtok::plus; + } + + while (isdigit(CurPtr[0])) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return tgtok::IntVal; +} + +/// LexBracket - We just read '['. If this is a code block, return it, +/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' +tgtok::TokKind TGLexer::LexBracket() { + if (CurPtr[0] != '{') + return tgtok::l_square; + ++CurPtr; + const char *CodeStart = CurPtr; + while (1) { + int Char = getNextChar(); + if (Char == EOF) break; + + if (Char != '}') continue; + + Char = getNextChar(); + if (Char == EOF) break; + if (Char == ']') { + CurStrVal.assign(CodeStart, CurPtr-2); + return tgtok::CodeFragment; + } + } + + return ReturnError(CodeStart-2, "Unterminated Code Block"); +} + +/// LexExclaim - Lex '!' and '![a-zA-Z]+'. +tgtok::TokKind TGLexer::LexExclaim() { + if (!isalpha(*CurPtr)) + return ReturnError(CurPtr - 1, "Invalid \"!operator\""); + + const char *Start = CurPtr++; + while (isalpha(*CurPtr)) + ++CurPtr; + + // Check to see which operator this is. + tgtok::TokKind Kind = + StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) + .Case("eq", tgtok::XEq) + .Case("if", tgtok::XIf) + .Case("head", tgtok::XHead) + .Case("tail", tgtok::XTail) + .Case("con", tgtok::XConcat) + .Case("shl", tgtok::XSHL) + .Case("sra", tgtok::XSRA) + .Case("srl", tgtok::XSRL) + .Case("cast", tgtok::XCast) + .Case("empty", tgtok::XEmpty) + .Case("subst", tgtok::XSubst) + .Case("foreach", tgtok::XForEach) + .Case("strconcat", tgtok::XStrConcat) + .Default(tgtok::Error); + + return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); +} + diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h new file mode 100644 index 0000000..84d328b --- /dev/null +++ b/contrib/llvm/lib/TableGen/TGLexer.h @@ -0,0 +1,125 @@ +//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents the Lexer for tablegen files. +// +//===----------------------------------------------------------------------===// + +#ifndef TGLEXER_H +#define TGLEXER_H + +#include "llvm/Support/DataTypes.h" +#include <string> +#include <vector> +#include <cassert> + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class SMLoc; +class Twine; + +namespace tgtok { + enum TokKind { + // Markers + Eof, Error, + + // Tokens with no info. + minus, plus, // - + + l_square, r_square, // [ ] + l_brace, r_brace, // { } + l_paren, r_paren, // ( ) + less, greater, // < > + colon, semi, // : ; + comma, period, // , . + equal, question, // = ? + + // Keywords. + Bit, Bits, Class, Code, Dag, Def, Defm, Field, In, Int, Let, List, + MultiClass, String, + + // !keywords. + XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst, + XForEach, XHead, XTail, XEmpty, XIf, XEq, + + // Integer value. + IntVal, + + // String valued tokens. + Id, StrVal, VarName, CodeFragment + }; +} + +/// TGLexer - TableGen Lexer class. +class TGLexer { + SourceMgr &SrcMgr; + + const char *CurPtr; + const MemoryBuffer *CurBuf; + + // Information about the current token. + const char *TokStart; + tgtok::TokKind CurCode; + std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT + int64_t CurIntVal; // This is valid for INTVAL. + + /// CurBuffer - This is the current buffer index we're lexing from as managed + /// by the SourceMgr object. + int CurBuffer; + /// Dependencies - This is the list of all included files. + std::vector<std::string> Dependencies; + +public: + TGLexer(SourceMgr &SrcMgr); + ~TGLexer() {} + + tgtok::TokKind Lex() { + return CurCode = LexToken(); + } + + const std::vector<std::string> &getDependencies() const { + return Dependencies; + } + + tgtok::TokKind getCode() const { return CurCode; } + + const std::string &getCurStrVal() const { + assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal || + CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) && + "This token doesn't have a string value"); + return CurStrVal; + } + int64_t getCurIntVal() const { + assert(CurCode == tgtok::IntVal && "This token isn't an integer"); + return CurIntVal; + } + + SMLoc getLoc() const; + +private: + /// LexToken - Read the next token and return its code. + tgtok::TokKind LexToken(); + + tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg); + + int getNextChar(); + void SkipBCPLComment(); + bool SkipCComment(); + tgtok::TokKind LexIdentifier(); + bool LexInclude(); + tgtok::TokKind LexString(); + tgtok::TokKind LexVarName(); + tgtok::TokKind LexNumber(); + tgtok::TokKind LexBracket(); + tgtok::TokKind LexExclaim(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp new file mode 100644 index 0000000..e7f00ba --- /dev/null +++ b/contrib/llvm/lib/TableGen/TGParser.cpp @@ -0,0 +1,2194 @@ +//===- TGParser.cpp - Parser for TableGen Files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Parser for TableGen. +// +//===----------------------------------------------------------------------===// + +#include "TGParser.h" +#include "llvm/TableGen/Record.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <sstream> +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Support Code for the Semantic Actions. +//===----------------------------------------------------------------------===// + +namespace llvm { +struct SubClassReference { + SMLoc RefLoc; + Record *Rec; + std::vector<Init*> TemplateArgs; + SubClassReference() : Rec(0) {} + + bool isInvalid() const { return Rec == 0; } +}; + +struct SubMultiClassReference { + SMLoc RefLoc; + MultiClass *MC; + std::vector<Init*> TemplateArgs; + SubMultiClassReference() : MC(0) {} + + bool isInvalid() const { return MC == 0; } + void dump() const; +}; + +void SubMultiClassReference::dump() const { + errs() << "Multiclass:\n"; + + MC->dump(); + + errs() << "Template args:\n"; + for (std::vector<Init *>::const_iterator i = TemplateArgs.begin(), + iend = TemplateArgs.end(); + i != iend; + ++i) { + (*i)->dump(); + } +} + +} // end namespace llvm + +bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) { + if (CurRec == 0) + CurRec = &CurMultiClass->Rec; + + if (RecordVal *ERV = CurRec->getValue(RV.getName())) { + // The value already exists in the class, treat this as a set. + if (ERV->setValue(RV.getValue())) + return Error(Loc, "New definition of '" + RV.getName() + "' of type '" + + RV.getType()->getAsString() + "' is incompatible with " + + "previous definition of type '" + + ERV->getType()->getAsString() + "'"); + } else { + CurRec->addValue(RV); + } + return false; +} + +/// SetValue - +/// Return true on error, false on success. +bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName, + const std::vector<unsigned> &BitList, Init *V) { + if (!V) return false; + + if (CurRec == 0) CurRec = &CurMultiClass->Rec; + + RecordVal *RV = CurRec->getValue(ValName); + if (RV == 0) + return Error(Loc, "Value '" + ValName + "' unknown!"); + + // Do not allow assignments like 'X = X'. This will just cause infinite loops + // in the resolution machinery. + if (BitList.empty()) + if (VarInit *VI = dynamic_cast<VarInit*>(V)) + if (VI->getName() == ValName) + return false; + + // If we are assigning to a subset of the bits in the value... then we must be + // assigning to a field of BitsRecTy, which must have a BitsInit + // initializer. + // + if (!BitList.empty()) { + BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue()); + if (CurVal == 0) + return Error(Loc, "Value '" + ValName + "' is not a bits type"); + + // Convert the incoming value to a bits type of the appropriate size... + Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size())); + if (BI == 0) { + V->convertInitializerTo(BitsRecTy::get(BitList.size())); + return Error(Loc, "Initializer is not compatible with bit range"); + } + + // We should have a BitsInit type now. + BitsInit *BInit = dynamic_cast<BitsInit*>(BI); + assert(BInit != 0); + + SmallVector<Init *, 16> NewBits(CurVal->getNumBits()); + + // Loop over bits, assigning values as appropriate. + for (unsigned i = 0, e = BitList.size(); i != e; ++i) { + unsigned Bit = BitList[i]; + if (NewBits[Bit]) + return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" + + ValName + "' more than once"); + NewBits[Bit] = BInit->getBit(i); + } + + for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i) + if (NewBits[i] == 0) + NewBits[i] = CurVal->getBit(i); + + V = BitsInit::get(NewBits); + } + + if (RV->setValue(V)) + return Error(Loc, "Value '" + ValName + "' of type '" + + RV->getType()->getAsString() + + "' is incompatible with initializer '" + V->getAsString() +"'"); + return false; +} + +/// AddSubClass - Add SubClass as a subclass to CurRec, resolving its template +/// args as SubClass's template arguments. +bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { + Record *SC = SubClass.Rec; + // Add all of the values in the subclass into the current class. + const std::vector<RecordVal> &Vals = SC->getValues(); + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + if (AddValue(CurRec, SubClass.RefLoc, Vals[i])) + return true; + + const std::vector<std::string> &TArgs = SC->getTemplateArgs(); + + // Ensure that an appropriate number of template arguments are specified. + if (TArgs.size() < SubClass.TemplateArgs.size()) + return Error(SubClass.RefLoc, "More template args specified than expected"); + + // Loop over all of the template arguments, setting them to the specified + // value or leaving them as the default if necessary. + for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { + if (i < SubClass.TemplateArgs.size()) { + // If a value is specified for this template arg, set it now. + if (SetValue(CurRec, SubClass.RefLoc, TArgs[i], std::vector<unsigned>(), + SubClass.TemplateArgs[i])) + return true; + + // Resolve it next. + CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i])); + + // Now remove it. + CurRec->removeValue(TArgs[i]); + + } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) { + return Error(SubClass.RefLoc,"Value not specified for template argument #" + + utostr(i) + " (" + TArgs[i] + ") of subclass '" + + SC->getName() + "'!"); + } + } + + // Since everything went well, we can now set the "superclass" list for the + // current record. + const std::vector<Record*> &SCs = SC->getSuperClasses(); + for (unsigned i = 0, e = SCs.size(); i != e; ++i) { + if (CurRec->isSubClassOf(SCs[i])) + return Error(SubClass.RefLoc, + "Already subclass of '" + SCs[i]->getName() + "'!\n"); + CurRec->addSuperClass(SCs[i]); + } + + if (CurRec->isSubClassOf(SC)) + return Error(SubClass.RefLoc, + "Already subclass of '" + SC->getName() + "'!\n"); + CurRec->addSuperClass(SC); + return false; +} + +/// AddSubMultiClass - Add SubMultiClass as a subclass to +/// CurMC, resolving its template args as SubMultiClass's +/// template arguments. +bool TGParser::AddSubMultiClass(MultiClass *CurMC, + SubMultiClassReference &SubMultiClass) { + MultiClass *SMC = SubMultiClass.MC; + Record *CurRec = &CurMC->Rec; + + const std::vector<RecordVal> &MCVals = CurRec->getValues(); + + // Add all of the values in the subclass into the current class. + const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues(); + for (unsigned i = 0, e = SMCVals.size(); i != e; ++i) + if (AddValue(CurRec, SubMultiClass.RefLoc, SMCVals[i])) + return true; + + int newDefStart = CurMC->DefPrototypes.size(); + + // Add all of the defs in the subclass into the current multiclass. + for (MultiClass::RecordVector::const_iterator i = SMC->DefPrototypes.begin(), + iend = SMC->DefPrototypes.end(); + i != iend; + ++i) { + // Clone the def and add it to the current multiclass + Record *NewDef = new Record(**i); + + // Add all of the values in the superclass into the current def. + for (unsigned i = 0, e = MCVals.size(); i != e; ++i) + if (AddValue(NewDef, SubMultiClass.RefLoc, MCVals[i])) + return true; + + CurMC->DefPrototypes.push_back(NewDef); + } + + const std::vector<std::string> &SMCTArgs = SMC->Rec.getTemplateArgs(); + + // Ensure that an appropriate number of template arguments are + // specified. + if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size()) + return Error(SubMultiClass.RefLoc, + "More template args specified than expected"); + + // Loop over all of the template arguments, setting them to the specified + // value or leaving them as the default if necessary. + for (unsigned i = 0, e = SMCTArgs.size(); i != e; ++i) { + if (i < SubMultiClass.TemplateArgs.size()) { + // If a value is specified for this template arg, set it in the + // superclass now. + if (SetValue(CurRec, SubMultiClass.RefLoc, SMCTArgs[i], + std::vector<unsigned>(), + SubMultiClass.TemplateArgs[i])) + return true; + + // Resolve it next. + CurRec->resolveReferencesTo(CurRec->getValue(SMCTArgs[i])); + + // Now remove it. + CurRec->removeValue(SMCTArgs[i]); + + // If a value is specified for this template arg, set it in the + // new defs now. + for (MultiClass::RecordVector::iterator j = + CurMC->DefPrototypes.begin() + newDefStart, + jend = CurMC->DefPrototypes.end(); + j != jend; + ++j) { + Record *Def = *j; + + if (SetValue(Def, SubMultiClass.RefLoc, SMCTArgs[i], + std::vector<unsigned>(), + SubMultiClass.TemplateArgs[i])) + return true; + + // Resolve it next. + Def->resolveReferencesTo(Def->getValue(SMCTArgs[i])); + + // Now remove it + Def->removeValue(SMCTArgs[i]); + } + } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) { + return Error(SubMultiClass.RefLoc, + "Value not specified for template argument #" + + utostr(i) + " (" + SMCTArgs[i] + ") of subclass '" + + SMC->Rec.getName() + "'!"); + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Parser Code +//===----------------------------------------------------------------------===// + +/// isObjectStart - Return true if this is a valid first token for an Object. +static bool isObjectStart(tgtok::TokKind K) { + return K == tgtok::Class || K == tgtok::Def || + K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass; +} + +static std::string GetNewAnonymousName() { + static unsigned AnonCounter = 0; + return "anonymous."+utostr(AnonCounter++); +} + +/// ParseObjectName - If an object name is specified, return it. Otherwise, +/// return an anonymous name. +/// ObjectName ::= ID +/// ObjectName ::= /*empty*/ +/// +std::string TGParser::ParseObjectName() { + if (Lex.getCode() != tgtok::Id) + return GetNewAnonymousName(); + + std::string Ret = Lex.getCurStrVal(); + Lex.Lex(); + return Ret; +} + + +/// ParseClassID - Parse and resolve a reference to a class name. This returns +/// null on error. +/// +/// ClassID ::= ID +/// +Record *TGParser::ParseClassID() { + if (Lex.getCode() != tgtok::Id) { + TokError("expected name for ClassID"); + return 0; + } + + Record *Result = Records.getClass(Lex.getCurStrVal()); + if (Result == 0) + TokError("Couldn't find class '" + Lex.getCurStrVal() + "'"); + + Lex.Lex(); + return Result; +} + +/// ParseMultiClassID - Parse and resolve a reference to a multiclass name. +/// This returns null on error. +/// +/// MultiClassID ::= ID +/// +MultiClass *TGParser::ParseMultiClassID() { + if (Lex.getCode() != tgtok::Id) { + TokError("expected name for ClassID"); + return 0; + } + + MultiClass *Result = MultiClasses[Lex.getCurStrVal()]; + if (Result == 0) + TokError("Couldn't find class '" + Lex.getCurStrVal() + "'"); + + Lex.Lex(); + return Result; +} + +Record *TGParser::ParseDefmID() { + if (Lex.getCode() != tgtok::Id) { + TokError("expected multiclass name"); + return 0; + } + + MultiClass *MC = MultiClasses[Lex.getCurStrVal()]; + if (MC == 0) { + TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'"); + return 0; + } + + Lex.Lex(); + return &MC->Rec; +} + + +/// ParseSubClassReference - Parse a reference to a subclass or to a templated +/// subclass. This returns a SubClassRefTy with a null Record* on error. +/// +/// SubClassRef ::= ClassID +/// SubClassRef ::= ClassID '<' ValueList '>' +/// +SubClassReference TGParser:: +ParseSubClassReference(Record *CurRec, bool isDefm) { + SubClassReference Result; + Result.RefLoc = Lex.getLoc(); + + if (isDefm) + Result.Rec = ParseDefmID(); + else + Result.Rec = ParseClassID(); + if (Result.Rec == 0) return Result; + + // If there is no template arg list, we're done. + if (Lex.getCode() != tgtok::less) + return Result; + Lex.Lex(); // Eat the '<' + + if (Lex.getCode() == tgtok::greater) { + TokError("subclass reference requires a non-empty list of template values"); + Result.Rec = 0; + return Result; + } + + Result.TemplateArgs = ParseValueList(CurRec, Result.Rec); + if (Result.TemplateArgs.empty()) { + Result.Rec = 0; // Error parsing value list. + return Result; + } + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' in template value list"); + Result.Rec = 0; + return Result; + } + Lex.Lex(); + + return Result; +} + +/// ParseSubMultiClassReference - Parse a reference to a subclass or to a +/// templated submulticlass. This returns a SubMultiClassRefTy with a null +/// Record* on error. +/// +/// SubMultiClassRef ::= MultiClassID +/// SubMultiClassRef ::= MultiClassID '<' ValueList '>' +/// +SubMultiClassReference TGParser:: +ParseSubMultiClassReference(MultiClass *CurMC) { + SubMultiClassReference Result; + Result.RefLoc = Lex.getLoc(); + + Result.MC = ParseMultiClassID(); + if (Result.MC == 0) return Result; + + // If there is no template arg list, we're done. + if (Lex.getCode() != tgtok::less) + return Result; + Lex.Lex(); // Eat the '<' + + if (Lex.getCode() == tgtok::greater) { + TokError("subclass reference requires a non-empty list of template values"); + Result.MC = 0; + return Result; + } + + Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec); + if (Result.TemplateArgs.empty()) { + Result.MC = 0; // Error parsing value list. + return Result; + } + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' in template value list"); + Result.MC = 0; + return Result; + } + Lex.Lex(); + + return Result; +} + +/// ParseRangePiece - Parse a bit/value range. +/// RangePiece ::= INTVAL +/// RangePiece ::= INTVAL '-' INTVAL +/// RangePiece ::= INTVAL INTVAL +bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) { + if (Lex.getCode() != tgtok::IntVal) { + TokError("expected integer or bitrange"); + return true; + } + int64_t Start = Lex.getCurIntVal(); + int64_t End; + + if (Start < 0) + return TokError("invalid range, cannot be negative"); + + switch (Lex.Lex()) { // eat first character. + default: + Ranges.push_back(Start); + return false; + case tgtok::minus: + if (Lex.Lex() != tgtok::IntVal) { + TokError("expected integer value as end of range"); + return true; + } + End = Lex.getCurIntVal(); + break; + case tgtok::IntVal: + End = -Lex.getCurIntVal(); + break; + } + if (End < 0) + return TokError("invalid range, cannot be negative"); + Lex.Lex(); + + // Add to the range. + if (Start < End) { + for (; Start <= End; ++Start) + Ranges.push_back(Start); + } else { + for (; Start >= End; --Start) + Ranges.push_back(Start); + } + return false; +} + +/// ParseRangeList - Parse a list of scalars and ranges into scalar values. +/// +/// RangeList ::= RangePiece (',' RangePiece)* +/// +std::vector<unsigned> TGParser::ParseRangeList() { + std::vector<unsigned> Result; + + // Parse the first piece. + if (ParseRangePiece(Result)) + return std::vector<unsigned>(); + while (Lex.getCode() == tgtok::comma) { + Lex.Lex(); // Eat the comma. + + // Parse the next range piece. + if (ParseRangePiece(Result)) + return std::vector<unsigned>(); + } + return Result; +} + +/// ParseOptionalRangeList - Parse either a range list in <>'s or nothing. +/// OptionalRangeList ::= '<' RangeList '>' +/// OptionalRangeList ::= /*empty*/ +bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) { + if (Lex.getCode() != tgtok::less) + return false; + + SMLoc StartLoc = Lex.getLoc(); + Lex.Lex(); // eat the '<' + + // Parse the range list. + Ranges = ParseRangeList(); + if (Ranges.empty()) return true; + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' at end of range list"); + return Error(StartLoc, "to match this '<'"); + } + Lex.Lex(); // eat the '>'. + return false; +} + +/// ParseOptionalBitList - Parse either a bit list in {}'s or nothing. +/// OptionalBitList ::= '{' RangeList '}' +/// OptionalBitList ::= /*empty*/ +bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) { + if (Lex.getCode() != tgtok::l_brace) + return false; + + SMLoc StartLoc = Lex.getLoc(); + Lex.Lex(); // eat the '{' + + // Parse the range list. + Ranges = ParseRangeList(); + if (Ranges.empty()) return true; + + if (Lex.getCode() != tgtok::r_brace) { + TokError("expected '}' at end of bit list"); + return Error(StartLoc, "to match this '{'"); + } + Lex.Lex(); // eat the '}'. + return false; +} + + +/// ParseType - Parse and return a tblgen type. This returns null on error. +/// +/// Type ::= STRING // string type +/// Type ::= BIT // bit type +/// Type ::= BITS '<' INTVAL '>' // bits<x> type +/// Type ::= INT // int type +/// Type ::= LIST '<' Type '>' // list<x> type +/// Type ::= CODE // code type +/// Type ::= DAG // dag type +/// Type ::= ClassID // Record Type +/// +RecTy *TGParser::ParseType() { + switch (Lex.getCode()) { + default: TokError("Unknown token when expecting a type"); return 0; + case tgtok::String: Lex.Lex(); return StringRecTy::get(); + case tgtok::Bit: Lex.Lex(); return BitRecTy::get(); + case tgtok::Int: Lex.Lex(); return IntRecTy::get(); + case tgtok::Code: Lex.Lex(); return CodeRecTy::get(); + case tgtok::Dag: Lex.Lex(); return DagRecTy::get(); + case tgtok::Id: + if (Record *R = ParseClassID()) return RecordRecTy::get(R); + return 0; + case tgtok::Bits: { + if (Lex.Lex() != tgtok::less) { // Eat 'bits' + TokError("expected '<' after bits type"); + return 0; + } + if (Lex.Lex() != tgtok::IntVal) { // Eat '<' + TokError("expected integer in bits<n> type"); + return 0; + } + uint64_t Val = Lex.getCurIntVal(); + if (Lex.Lex() != tgtok::greater) { // Eat count. + TokError("expected '>' at end of bits<n> type"); + return 0; + } + Lex.Lex(); // Eat '>' + return BitsRecTy::get(Val); + } + case tgtok::List: { + if (Lex.Lex() != tgtok::less) { // Eat 'bits' + TokError("expected '<' after list type"); + return 0; + } + Lex.Lex(); // Eat '<' + RecTy *SubType = ParseType(); + if (SubType == 0) return 0; + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' at end of list<ty> type"); + return 0; + } + Lex.Lex(); // Eat '>' + return ListRecTy::get(SubType); + } + } +} + +/// ParseIDValue - Parse an ID as a value and decode what it means. +/// +/// IDValue ::= ID [def local value] +/// IDValue ::= ID [def template arg] +/// IDValue ::= ID [multiclass local value] +/// IDValue ::= ID [multiclass template argument] +/// IDValue ::= ID [def name] +/// +Init *TGParser::ParseIDValue(Record *CurRec) { + assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue"); + std::string Name = Lex.getCurStrVal(); + SMLoc Loc = Lex.getLoc(); + Lex.Lex(); + return ParseIDValue(CurRec, Name, Loc); +} + +/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID +/// has already been read. +Init *TGParser::ParseIDValue(Record *CurRec, + const std::string &Name, SMLoc NameLoc) { + if (CurRec) { + if (const RecordVal *RV = CurRec->getValue(Name)) + return VarInit::get(Name, RV->getType()); + + std::string TemplateArgName = CurRec->getName()+":"+Name; + if (CurMultiClass) + TemplateArgName = CurMultiClass->Rec.getName()+"::"+TemplateArgName; + + if (CurRec->isTemplateArg(TemplateArgName)) { + const RecordVal *RV = CurRec->getValue(TemplateArgName); + assert(RV && "Template arg doesn't exist??"); + return VarInit::get(TemplateArgName, RV->getType()); + } + } + + if (CurMultiClass) { + std::string MCName = CurMultiClass->Rec.getName()+"::"+Name; + if (CurMultiClass->Rec.isTemplateArg(MCName)) { + const RecordVal *RV = CurMultiClass->Rec.getValue(MCName); + assert(RV && "Template arg doesn't exist??"); + return VarInit::get(MCName, RV->getType()); + } + } + + if (Record *D = Records.getDef(Name)) + return DefInit::get(D); + + Error(NameLoc, "Variable not defined: '" + Name + "'"); + return 0; +} + +/// ParseOperation - Parse an operator. This returns null on error. +/// +/// Operation ::= XOperator ['<' Type '>'] '(' Args ')' +/// +Init *TGParser::ParseOperation(Record *CurRec) { + switch (Lex.getCode()) { + default: + TokError("unknown operation"); + return 0; + break; + case tgtok::XHead: + case tgtok::XTail: + case tgtok::XEmpty: + case tgtok::XCast: { // Value ::= !unop '(' Value ')' + UnOpInit::UnaryOp Code; + RecTy *Type = 0; + + switch (Lex.getCode()) { + default: assert(0 && "Unhandled code!"); + case tgtok::XCast: + Lex.Lex(); // eat the operation + Code = UnOpInit::CAST; + + Type = ParseOperatorType(); + + if (Type == 0) { + TokError("did not get type for unary operator"); + return 0; + } + + break; + case tgtok::XHead: + Lex.Lex(); // eat the operation + Code = UnOpInit::HEAD; + break; + case tgtok::XTail: + Lex.Lex(); // eat the operation + Code = UnOpInit::TAIL; + break; + case tgtok::XEmpty: + Lex.Lex(); // eat the operation + Code = UnOpInit::EMPTY; + Type = IntRecTy::get(); + break; + } + if (Lex.getCode() != tgtok::l_paren) { + TokError("expected '(' after unary operator"); + return 0; + } + Lex.Lex(); // eat the '(' + + Init *LHS = ParseValue(CurRec); + if (LHS == 0) return 0; + + if (Code == UnOpInit::HEAD + || Code == UnOpInit::TAIL + || Code == UnOpInit::EMPTY) { + ListInit *LHSl = dynamic_cast<ListInit*>(LHS); + StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS); + if (LHSl == 0 && LHSs == 0 && LHSt == 0) { + TokError("expected list or string type argument in unary operator"); + return 0; + } + if (LHSt) { + ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType()); + StringRecTy *SType = dynamic_cast<StringRecTy*>(LHSt->getType()); + if (LType == 0 && SType == 0) { + TokError("expected list or string type argumnet in unary operator"); + return 0; + } + } + + if (Code == UnOpInit::HEAD + || Code == UnOpInit::TAIL) { + if (LHSl == 0 && LHSt == 0) { + TokError("expected list type argumnet in unary operator"); + return 0; + } + + if (LHSl && LHSl->getSize() == 0) { + TokError("empty list argument in unary operator"); + return 0; + } + if (LHSl) { + Init *Item = LHSl->getElement(0); + TypedInit *Itemt = dynamic_cast<TypedInit*>(Item); + if (Itemt == 0) { + TokError("untyped list element in unary operator"); + return 0; + } + if (Code == UnOpInit::HEAD) { + Type = Itemt->getType(); + } else { + Type = ListRecTy::get(Itemt->getType()); + } + } else { + assert(LHSt && "expected list type argument in unary operator"); + ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType()); + if (LType == 0) { + TokError("expected list type argumnet in unary operator"); + return 0; + } + if (Code == UnOpInit::HEAD) { + Type = LType->getElementType(); + } else { + Type = LType; + } + } + } + } + + if (Lex.getCode() != tgtok::r_paren) { + TokError("expected ')' in unary operator"); + return 0; + } + Lex.Lex(); // eat the ')' + return (UnOpInit::get(Code, LHS, Type))->Fold(CurRec, CurMultiClass); + } + + case tgtok::XConcat: + case tgtok::XSRA: + case tgtok::XSRL: + case tgtok::XSHL: + case tgtok::XEq: + case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')' + tgtok::TokKind OpTok = Lex.getCode(); + SMLoc OpLoc = Lex.getLoc(); + Lex.Lex(); // eat the operation + + BinOpInit::BinaryOp Code; + RecTy *Type = 0; + + switch (OpTok) { + default: assert(0 && "Unhandled code!"); + case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break; + case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break; + case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break; + case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break; + case tgtok::XEq: Code = BinOpInit::EQ; Type = BitRecTy::get(); break; + case tgtok::XStrConcat: + Code = BinOpInit::STRCONCAT; + Type = StringRecTy::get(); + break; + } + + if (Lex.getCode() != tgtok::l_paren) { + TokError("expected '(' after binary operator"); + return 0; + } + Lex.Lex(); // eat the '(' + + SmallVector<Init*, 2> InitList; + + InitList.push_back(ParseValue(CurRec)); + if (InitList.back() == 0) return 0; + + while (Lex.getCode() == tgtok::comma) { + Lex.Lex(); // eat the ',' + + InitList.push_back(ParseValue(CurRec)); + if (InitList.back() == 0) return 0; + } + + if (Lex.getCode() != tgtok::r_paren) { + TokError("expected ')' in operator"); + return 0; + } + Lex.Lex(); // eat the ')' + + // We allow multiple operands to associative operators like !strconcat as + // shorthand for nesting them. + if (Code == BinOpInit::STRCONCAT) { + while (InitList.size() > 2) { + Init *RHS = InitList.pop_back_val(); + RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type)) + ->Fold(CurRec, CurMultiClass); + InitList.back() = RHS; + } + } + + if (InitList.size() == 2) + return (BinOpInit::get(Code, InitList[0], InitList[1], Type)) + ->Fold(CurRec, CurMultiClass); + + Error(OpLoc, "expected two operands to operator"); + return 0; + } + + case tgtok::XIf: + case tgtok::XForEach: + case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + TernOpInit::TernaryOp Code; + RecTy *Type = 0; + + tgtok::TokKind LexCode = Lex.getCode(); + Lex.Lex(); // eat the operation + switch (LexCode) { + default: assert(0 && "Unhandled code!"); + case tgtok::XIf: + Code = TernOpInit::IF; + break; + case tgtok::XForEach: + Code = TernOpInit::FOREACH; + break; + case tgtok::XSubst: + Code = TernOpInit::SUBST; + break; + } + if (Lex.getCode() != tgtok::l_paren) { + TokError("expected '(' after ternary operator"); + return 0; + } + Lex.Lex(); // eat the '(' + + Init *LHS = ParseValue(CurRec); + if (LHS == 0) return 0; + + if (Lex.getCode() != tgtok::comma) { + TokError("expected ',' in ternary operator"); + return 0; + } + Lex.Lex(); // eat the ',' + + Init *MHS = ParseValue(CurRec); + if (MHS == 0) return 0; + + if (Lex.getCode() != tgtok::comma) { + TokError("expected ',' in ternary operator"); + return 0; + } + Lex.Lex(); // eat the ',' + + Init *RHS = ParseValue(CurRec); + if (RHS == 0) return 0; + + if (Lex.getCode() != tgtok::r_paren) { + TokError("expected ')' in binary operator"); + return 0; + } + Lex.Lex(); // eat the ')' + + switch (LexCode) { + default: assert(0 && "Unhandled code!"); + case tgtok::XIf: { + // FIXME: The `!if' operator doesn't handle non-TypedInit well at + // all. This can be made much more robust. + TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS); + TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS); + + RecTy *MHSTy = 0; + RecTy *RHSTy = 0; + + if (MHSt == 0 && RHSt == 0) { + BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS); + BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS); + + if (MHSbits && RHSbits && + MHSbits->getNumBits() == RHSbits->getNumBits()) { + Type = BitRecTy::get(); + break; + } else { + BitInit *MHSbit = dynamic_cast<BitInit*>(MHS); + BitInit *RHSbit = dynamic_cast<BitInit*>(RHS); + + if (MHSbit && RHSbit) { + Type = BitRecTy::get(); + break; + } + } + } else if (MHSt != 0 && RHSt != 0) { + MHSTy = MHSt->getType(); + RHSTy = RHSt->getType(); + } + + if (!MHSTy || !RHSTy) { + TokError("could not get type for !if"); + return 0; + } + + if (MHSTy->typeIsConvertibleTo(RHSTy)) { + Type = RHSTy; + } else if (RHSTy->typeIsConvertibleTo(MHSTy)) { + Type = MHSTy; + } else { + TokError("inconsistent types for !if"); + return 0; + } + break; + } + case tgtok::XForEach: { + TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS); + if (MHSt == 0) { + TokError("could not get type for !foreach"); + return 0; + } + Type = MHSt->getType(); + break; + } + case tgtok::XSubst: { + TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS); + if (RHSt == 0) { + TokError("could not get type for !subst"); + return 0; + } + Type = RHSt->getType(); + break; + } + } + return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec, + CurMultiClass); + } + } + TokError("could not parse operation"); + return 0; +} + +/// ParseOperatorType - Parse a type for an operator. This returns +/// null on error. +/// +/// OperatorType ::= '<' Type '>' +/// +RecTy *TGParser::ParseOperatorType() { + RecTy *Type = 0; + + if (Lex.getCode() != tgtok::less) { + TokError("expected type name for operator"); + return 0; + } + Lex.Lex(); // eat the < + + Type = ParseType(); + + if (Type == 0) { + TokError("expected type name for operator"); + return 0; + } + + if (Lex.getCode() != tgtok::greater) { + TokError("expected type name for operator"); + return 0; + } + Lex.Lex(); // eat the > + + return Type; +} + + +/// ParseSimpleValue - Parse a tblgen value. This returns null on error. +/// +/// SimpleValue ::= IDValue +/// SimpleValue ::= INTVAL +/// SimpleValue ::= STRVAL+ +/// SimpleValue ::= CODEFRAGMENT +/// SimpleValue ::= '?' +/// SimpleValue ::= '{' ValueList '}' +/// SimpleValue ::= ID '<' ValueListNE '>' +/// SimpleValue ::= '[' ValueList ']' +/// SimpleValue ::= '(' IDValue DagArgList ')' +/// SimpleValue ::= CONCATTOK '(' Value ',' Value ')' +/// SimpleValue ::= SHLTOK '(' Value ',' Value ')' +/// SimpleValue ::= SRATOK '(' Value ',' Value ')' +/// SimpleValue ::= SRLTOK '(' Value ',' Value ')' +/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')' +/// +Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) { + Init *R = 0; + switch (Lex.getCode()) { + default: TokError("Unknown token when parsing a value"); break; + case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break; + case tgtok::StrVal: { + std::string Val = Lex.getCurStrVal(); + Lex.Lex(); + + // Handle multiple consecutive concatenated strings. + while (Lex.getCode() == tgtok::StrVal) { + Val += Lex.getCurStrVal(); + Lex.Lex(); + } + + R = StringInit::get(Val); + break; + } + case tgtok::CodeFragment: + R = CodeInit::get(Lex.getCurStrVal()); + Lex.Lex(); + break; + case tgtok::question: + R = UnsetInit::get(); + Lex.Lex(); + break; + case tgtok::Id: { + SMLoc NameLoc = Lex.getLoc(); + std::string Name = Lex.getCurStrVal(); + if (Lex.Lex() != tgtok::less) // consume the Id. + return ParseIDValue(CurRec, Name, NameLoc); // Value ::= IDValue + + // Value ::= ID '<' ValueListNE '>' + if (Lex.Lex() == tgtok::greater) { + TokError("expected non-empty value list"); + return 0; + } + + // This is a CLASS<initvalslist> expression. This is supposed to synthesize + // a new anonymous definition, deriving from CLASS<initvalslist> with no + // body. + Record *Class = Records.getClass(Name); + if (!Class) { + Error(NameLoc, "Expected a class name, got '" + Name + "'"); + return 0; + } + + std::vector<Init*> ValueList = ParseValueList(CurRec, Class); + if (ValueList.empty()) return 0; + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' at end of value list"); + return 0; + } + Lex.Lex(); // eat the '>' + + // Create the new record, set it as CurRec temporarily. + static unsigned AnonCounter = 0; + Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++), + NameLoc, + Records); + SubClassReference SCRef; + SCRef.RefLoc = NameLoc; + SCRef.Rec = Class; + SCRef.TemplateArgs = ValueList; + // Add info about the subclass to NewRec. + if (AddSubClass(NewRec, SCRef)) + return 0; + NewRec->resolveReferences(); + Records.addDef(NewRec); + + // The result of the expression is a reference to the new record. + return DefInit::get(NewRec); + } + case tgtok::l_brace: { // Value ::= '{' ValueList '}' + SMLoc BraceLoc = Lex.getLoc(); + Lex.Lex(); // eat the '{' + std::vector<Init*> Vals; + + if (Lex.getCode() != tgtok::r_brace) { + Vals = ParseValueList(CurRec); + if (Vals.empty()) return 0; + } + if (Lex.getCode() != tgtok::r_brace) { + TokError("expected '}' at end of bit list value"); + return 0; + } + Lex.Lex(); // eat the '}' + + SmallVector<Init *, 16> NewBits(Vals.size()); + + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get()); + if (Bit == 0) { + Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+ + ") is not convertable to a bit"); + return 0; + } + NewBits[Vals.size()-i-1] = Bit; + } + return BitsInit::get(NewBits); + } + case tgtok::l_square: { // Value ::= '[' ValueList ']' + Lex.Lex(); // eat the '[' + std::vector<Init*> Vals; + + RecTy *DeducedEltTy = 0; + ListRecTy *GivenListTy = 0; + + if (ItemType != 0) { + ListRecTy *ListType = dynamic_cast<ListRecTy*>(ItemType); + if (ListType == 0) { + std::stringstream s; + s << "Type mismatch for list, expected list type, got " + << ItemType->getAsString(); + TokError(s.str()); + return 0; + } + GivenListTy = ListType; + } + + if (Lex.getCode() != tgtok::r_square) { + Vals = ParseValueList(CurRec, 0, + GivenListTy ? GivenListTy->getElementType() : 0); + if (Vals.empty()) return 0; + } + if (Lex.getCode() != tgtok::r_square) { + TokError("expected ']' at end of list value"); + return 0; + } + Lex.Lex(); // eat the ']' + + RecTy *GivenEltTy = 0; + if (Lex.getCode() == tgtok::less) { + // Optional list element type + Lex.Lex(); // eat the '<' + + GivenEltTy = ParseType(); + if (GivenEltTy == 0) { + // Couldn't parse element type + return 0; + } + + if (Lex.getCode() != tgtok::greater) { + TokError("expected '>' at end of list element type"); + return 0; + } + Lex.Lex(); // eat the '>' + } + + // Check elements + RecTy *EltTy = 0; + for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end(); + i != ie; + ++i) { + TypedInit *TArg = dynamic_cast<TypedInit*>(*i); + if (TArg == 0) { + TokError("Untyped list element"); + return 0; + } + if (EltTy != 0) { + EltTy = resolveTypes(EltTy, TArg->getType()); + if (EltTy == 0) { + TokError("Incompatible types in list elements"); + return 0; + } + } else { + EltTy = TArg->getType(); + } + } + + if (GivenEltTy != 0) { + if (EltTy != 0) { + // Verify consistency + if (!EltTy->typeIsConvertibleTo(GivenEltTy)) { + TokError("Incompatible types in list elements"); + return 0; + } + } + EltTy = GivenEltTy; + } + + if (EltTy == 0) { + if (ItemType == 0) { + TokError("No type for list"); + return 0; + } + DeducedEltTy = GivenListTy->getElementType(); + } else { + // Make sure the deduced type is compatible with the given type + if (GivenListTy) { + if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) { + TokError("Element type mismatch for list"); + return 0; + } + } + DeducedEltTy = EltTy; + } + + return ListInit::get(Vals, DeducedEltTy); + } + case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')' + Lex.Lex(); // eat the '(' + if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) { + TokError("expected identifier in dag init"); + return 0; + } + + Init *Operator = ParseValue(CurRec); + if (Operator == 0) return 0; + + // If the operator name is present, parse it. + std::string OperatorName; + if (Lex.getCode() == tgtok::colon) { + if (Lex.Lex() != tgtok::VarName) { // eat the ':' + TokError("expected variable name in dag operator"); + return 0; + } + OperatorName = Lex.getCurStrVal(); + Lex.Lex(); // eat the VarName. + } + + std::vector<std::pair<llvm::Init*, std::string> > DagArgs; + if (Lex.getCode() != tgtok::r_paren) { + DagArgs = ParseDagArgList(CurRec); + if (DagArgs.empty()) return 0; + } + + if (Lex.getCode() != tgtok::r_paren) { + TokError("expected ')' in dag init"); + return 0; + } + Lex.Lex(); // eat the ')' + + return DagInit::get(Operator, OperatorName, DagArgs); + } + + case tgtok::XHead: + case tgtok::XTail: + case tgtok::XEmpty: + case tgtok::XCast: // Value ::= !unop '(' Value ')' + case tgtok::XConcat: + case tgtok::XSRA: + case tgtok::XSRL: + case tgtok::XSHL: + case tgtok::XEq: + case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')' + case tgtok::XIf: + case tgtok::XForEach: + case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + return ParseOperation(CurRec); + } + } + + return R; +} + +/// ParseValue - Parse a tblgen value. This returns null on error. +/// +/// Value ::= SimpleValue ValueSuffix* +/// ValueSuffix ::= '{' BitList '}' +/// ValueSuffix ::= '[' BitList ']' +/// ValueSuffix ::= '.' ID +/// +Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) { + Init *Result = ParseSimpleValue(CurRec, ItemType); + if (Result == 0) return 0; + + // Parse the suffixes now if present. + while (1) { + switch (Lex.getCode()) { + default: return Result; + case tgtok::l_brace: { + SMLoc CurlyLoc = Lex.getLoc(); + Lex.Lex(); // eat the '{' + std::vector<unsigned> Ranges = ParseRangeList(); + if (Ranges.empty()) return 0; + + // Reverse the bitlist. + std::reverse(Ranges.begin(), Ranges.end()); + Result = Result->convertInitializerBitRange(Ranges); + if (Result == 0) { + Error(CurlyLoc, "Invalid bit range for value"); + return 0; + } + + // Eat the '}'. + if (Lex.getCode() != tgtok::r_brace) { + TokError("expected '}' at end of bit range list"); + return 0; + } + Lex.Lex(); + break; + } + case tgtok::l_square: { + SMLoc SquareLoc = Lex.getLoc(); + Lex.Lex(); // eat the '[' + std::vector<unsigned> Ranges = ParseRangeList(); + if (Ranges.empty()) return 0; + + Result = Result->convertInitListSlice(Ranges); + if (Result == 0) { + Error(SquareLoc, "Invalid range for list slice"); + return 0; + } + + // Eat the ']'. + if (Lex.getCode() != tgtok::r_square) { + TokError("expected ']' at end of list slice"); + return 0; + } + Lex.Lex(); + break; + } + case tgtok::period: + if (Lex.Lex() != tgtok::Id) { // eat the . + TokError("expected field identifier after '.'"); + return 0; + } + if (!Result->getFieldType(Lex.getCurStrVal())) { + TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" + + Result->getAsString() + "'"); + return 0; + } + Result = FieldInit::get(Result, Lex.getCurStrVal()); + Lex.Lex(); // eat field name + break; + } + } +} + +/// ParseDagArgList - Parse the argument list for a dag literal expression. +/// +/// ParseDagArgList ::= Value (':' VARNAME)? +/// ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)? +std::vector<std::pair<llvm::Init*, std::string> > +TGParser::ParseDagArgList(Record *CurRec) { + std::vector<std::pair<llvm::Init*, std::string> > Result; + + while (1) { + Init *Val = ParseValue(CurRec); + if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >(); + + // If the variable name is present, add it. + std::string VarName; + if (Lex.getCode() == tgtok::colon) { + if (Lex.Lex() != tgtok::VarName) { // eat the ':' + TokError("expected variable name in dag literal"); + return std::vector<std::pair<llvm::Init*, std::string> >(); + } + VarName = Lex.getCurStrVal(); + Lex.Lex(); // eat the VarName. + } + + Result.push_back(std::make_pair(Val, VarName)); + + if (Lex.getCode() != tgtok::comma) break; + Lex.Lex(); // eat the ',' + } + + return Result; +} + + +/// ParseValueList - Parse a comma separated list of values, returning them as a +/// vector. Note that this always expects to be able to parse at least one +/// value. It returns an empty list if this is not possible. +/// +/// ValueList ::= Value (',' Value) +/// +std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, + RecTy *EltTy) { + std::vector<Init*> Result; + RecTy *ItemType = EltTy; + unsigned int ArgN = 0; + if (ArgsRec != 0 && EltTy == 0) { + const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs(); + const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]); + if (!RV) { + errs() << "Cannot find template arg " << ArgN << " (" << TArgs[ArgN] + << ")\n"; + } + assert(RV && "Template argument record not found??"); + ItemType = RV->getType(); + ++ArgN; + } + Result.push_back(ParseValue(CurRec, ItemType)); + if (Result.back() == 0) return std::vector<Init*>(); + + while (Lex.getCode() == tgtok::comma) { + Lex.Lex(); // Eat the comma + + if (ArgsRec != 0 && EltTy == 0) { + const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs(); + if (ArgN >= TArgs.size()) { + TokError("too many template arguments"); + return std::vector<Init*>(); + } + const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]); + assert(RV && "Template argument record not found??"); + ItemType = RV->getType(); + ++ArgN; + } + Result.push_back(ParseValue(CurRec, ItemType)); + if (Result.back() == 0) return std::vector<Init*>(); + } + + return Result; +} + + +/// ParseDeclaration - Read a declaration, returning the name of field ID, or an +/// empty string on error. This can happen in a number of different context's, +/// including within a def or in the template args for a def (which which case +/// CurRec will be non-null) and within the template args for a multiclass (in +/// which case CurRec will be null, but CurMultiClass will be set). This can +/// also happen within a def that is within a multiclass, which will set both +/// CurRec and CurMultiClass. +/// +/// Declaration ::= FIELD? Type ID ('=' Value)? +/// +std::string TGParser::ParseDeclaration(Record *CurRec, + bool ParsingTemplateArgs) { + // Read the field prefix if present. + bool HasField = Lex.getCode() == tgtok::Field; + if (HasField) Lex.Lex(); + + RecTy *Type = ParseType(); + if (Type == 0) return ""; + + if (Lex.getCode() != tgtok::Id) { + TokError("Expected identifier in declaration"); + return ""; + } + + SMLoc IdLoc = Lex.getLoc(); + std::string DeclName = Lex.getCurStrVal(); + Lex.Lex(); + + if (ParsingTemplateArgs) { + if (CurRec) { + DeclName = CurRec->getName() + ":" + DeclName; + } else { + assert(CurMultiClass); + } + if (CurMultiClass) + DeclName = CurMultiClass->Rec.getName() + "::" + DeclName; + } + + // Add the value. + if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField))) + return ""; + + // If a value is present, parse it. + if (Lex.getCode() == tgtok::equal) { + Lex.Lex(); + SMLoc ValLoc = Lex.getLoc(); + Init *Val = ParseValue(CurRec, Type); + if (Val == 0 || + SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val)) + return ""; + } + + return DeclName; +} + +/// ParseTemplateArgList - Read a template argument list, which is a non-empty +/// sequence of template-declarations in <>'s. If CurRec is non-null, these are +/// template args for a def, which may or may not be in a multiclass. If null, +/// these are the template args for a multiclass. +/// +/// TemplateArgList ::= '<' Declaration (',' Declaration)* '>' +/// +bool TGParser::ParseTemplateArgList(Record *CurRec) { + assert(Lex.getCode() == tgtok::less && "Not a template arg list!"); + Lex.Lex(); // eat the '<' + + Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec; + + // Read the first declaration. + std::string TemplArg = ParseDeclaration(CurRec, true/*templateargs*/); + if (TemplArg.empty()) + return true; + + TheRecToAddTo->addTemplateArg(TemplArg); + + while (Lex.getCode() == tgtok::comma) { + Lex.Lex(); // eat the ',' + + // Read the following declarations. + TemplArg = ParseDeclaration(CurRec, true/*templateargs*/); + if (TemplArg.empty()) + return true; + TheRecToAddTo->addTemplateArg(TemplArg); + } + + if (Lex.getCode() != tgtok::greater) + return TokError("expected '>' at end of template argument list"); + Lex.Lex(); // eat the '>'. + return false; +} + + +/// ParseBodyItem - Parse a single item at within the body of a def or class. +/// +/// BodyItem ::= Declaration ';' +/// BodyItem ::= LET ID OptionalBitList '=' Value ';' +bool TGParser::ParseBodyItem(Record *CurRec) { + if (Lex.getCode() != tgtok::Let) { + if (ParseDeclaration(CurRec, false).empty()) + return true; + + if (Lex.getCode() != tgtok::semi) + return TokError("expected ';' after declaration"); + Lex.Lex(); + return false; + } + + // LET ID OptionalRangeList '=' Value ';' + if (Lex.Lex() != tgtok::Id) + return TokError("expected field identifier after let"); + + SMLoc IdLoc = Lex.getLoc(); + std::string FieldName = Lex.getCurStrVal(); + Lex.Lex(); // eat the field name. + + std::vector<unsigned> BitList; + if (ParseOptionalBitList(BitList)) + return true; + std::reverse(BitList.begin(), BitList.end()); + + if (Lex.getCode() != tgtok::equal) + return TokError("expected '=' in let expression"); + Lex.Lex(); // eat the '='. + + RecordVal *Field = CurRec->getValue(FieldName); + if (Field == 0) + return TokError("Value '" + FieldName + "' unknown!"); + + RecTy *Type = Field->getType(); + + Init *Val = ParseValue(CurRec, Type); + if (Val == 0) return true; + + if (Lex.getCode() != tgtok::semi) + return TokError("expected ';' after let expression"); + Lex.Lex(); + + return SetValue(CurRec, IdLoc, FieldName, BitList, Val); +} + +/// ParseBody - Read the body of a class or def. Return true on error, false on +/// success. +/// +/// Body ::= ';' +/// Body ::= '{' BodyList '}' +/// BodyList BodyItem* +/// +bool TGParser::ParseBody(Record *CurRec) { + // If this is a null definition, just eat the semi and return. + if (Lex.getCode() == tgtok::semi) { + Lex.Lex(); + return false; + } + + if (Lex.getCode() != tgtok::l_brace) + return TokError("Expected ';' or '{' to start body"); + // Eat the '{'. + Lex.Lex(); + + while (Lex.getCode() != tgtok::r_brace) + if (ParseBodyItem(CurRec)) + return true; + + // Eat the '}'. + Lex.Lex(); + return false; +} + +/// ParseObjectBody - Parse the body of a def or class. This consists of an +/// optional ClassList followed by a Body. CurRec is the current def or class +/// that is being parsed. +/// +/// ObjectBody ::= BaseClassList Body +/// BaseClassList ::= /*empty*/ +/// BaseClassList ::= ':' BaseClassListNE +/// BaseClassListNE ::= SubClassRef (',' SubClassRef)* +/// +bool TGParser::ParseObjectBody(Record *CurRec) { + // If there is a baseclass list, read it. + if (Lex.getCode() == tgtok::colon) { + Lex.Lex(); + + // Read all of the subclasses. + SubClassReference SubClass = ParseSubClassReference(CurRec, false); + while (1) { + // Check for error. + if (SubClass.Rec == 0) return true; + + // Add it. + if (AddSubClass(CurRec, SubClass)) + return true; + + if (Lex.getCode() != tgtok::comma) break; + Lex.Lex(); // eat ','. + SubClass = ParseSubClassReference(CurRec, false); + } + } + + // Process any variables on the let stack. + for (unsigned i = 0, e = LetStack.size(); i != e; ++i) + for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) + if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, + LetStack[i][j].Bits, LetStack[i][j].Value)) + return true; + + return ParseBody(CurRec); +} + +/// ParseDef - Parse and return a top level or multiclass def, return the record +/// corresponding to it. This returns null on error. +/// +/// DefInst ::= DEF ObjectName ObjectBody +/// +bool TGParser::ParseDef(MultiClass *CurMultiClass) { + SMLoc DefLoc = Lex.getLoc(); + assert(Lex.getCode() == tgtok::Def && "Unknown tok"); + Lex.Lex(); // Eat the 'def' token. + + // Parse ObjectName and make a record for it. + Record *CurRec = new Record(ParseObjectName(), DefLoc, Records); + + if (!CurMultiClass) { + // Top-level def definition. + + // Ensure redefinition doesn't happen. + if (Records.getDef(CurRec->getName())) { + Error(DefLoc, "def '" + CurRec->getName() + "' already defined"); + return true; + } + Records.addDef(CurRec); + } else { + // Otherwise, a def inside a multiclass, add it to the multiclass. + for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i) + if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) { + Error(DefLoc, "def '" + CurRec->getName() + + "' already defined in this multiclass!"); + return true; + } + CurMultiClass->DefPrototypes.push_back(CurRec); + } + + if (ParseObjectBody(CurRec)) + return true; + + if (CurMultiClass == 0) // Def's in multiclasses aren't really defs. + // See Record::setName(). This resolve step will see any new name + // for the def that might have been created when resolving + // inheritance, values and arguments above. + CurRec->resolveReferences(); + + // If ObjectBody has template arguments, it's an error. + assert(CurRec->getTemplateArgs().empty() && "How'd this get template args?"); + + if (CurMultiClass) { + // Copy the template arguments for the multiclass into the def. + const std::vector<std::string> &TArgs = + CurMultiClass->Rec.getTemplateArgs(); + + for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { + const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]); + assert(RV && "Template arg doesn't exist?"); + CurRec->addValue(*RV); + } + } + + return false; +} + +/// ParseClass - Parse a tblgen class definition. +/// +/// ClassInst ::= CLASS ID TemplateArgList? ObjectBody +/// +bool TGParser::ParseClass() { + assert(Lex.getCode() == tgtok::Class && "Unexpected token!"); + Lex.Lex(); + + if (Lex.getCode() != tgtok::Id) + return TokError("expected class name after 'class' keyword"); + + Record *CurRec = Records.getClass(Lex.getCurStrVal()); + if (CurRec) { + // If the body was previously defined, this is an error. + if (!CurRec->getValues().empty() || + !CurRec->getSuperClasses().empty() || + !CurRec->getTemplateArgs().empty()) + return TokError("Class '" + CurRec->getName() + "' already defined"); + } else { + // If this is the first reference to this class, create and add it. + CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records); + Records.addClass(CurRec); + } + Lex.Lex(); // eat the name. + + // If there are template args, parse them. + if (Lex.getCode() == tgtok::less) + if (ParseTemplateArgList(CurRec)) + return true; + + // Finally, parse the object body. + return ParseObjectBody(CurRec); +} + +/// ParseLetList - Parse a non-empty list of assignment expressions into a list +/// of LetRecords. +/// +/// LetList ::= LetItem (',' LetItem)* +/// LetItem ::= ID OptionalRangeList '=' Value +/// +std::vector<LetRecord> TGParser::ParseLetList() { + std::vector<LetRecord> Result; + + while (1) { + if (Lex.getCode() != tgtok::Id) { + TokError("expected identifier in let definition"); + return std::vector<LetRecord>(); + } + std::string Name = Lex.getCurStrVal(); + SMLoc NameLoc = Lex.getLoc(); + Lex.Lex(); // Eat the identifier. + + // Check for an optional RangeList. + std::vector<unsigned> Bits; + if (ParseOptionalRangeList(Bits)) + return std::vector<LetRecord>(); + std::reverse(Bits.begin(), Bits.end()); + + if (Lex.getCode() != tgtok::equal) { + TokError("expected '=' in let expression"); + return std::vector<LetRecord>(); + } + Lex.Lex(); // eat the '='. + + Init *Val = ParseValue(0); + if (Val == 0) return std::vector<LetRecord>(); + + // Now that we have everything, add the record. + Result.push_back(LetRecord(Name, Bits, Val, NameLoc)); + + if (Lex.getCode() != tgtok::comma) + return Result; + Lex.Lex(); // eat the comma. + } +} + +/// ParseTopLevelLet - Parse a 'let' at top level. This can be a couple of +/// different related productions. This works inside multiclasses too. +/// +/// Object ::= LET LetList IN '{' ObjectList '}' +/// Object ::= LET LetList IN Object +/// +bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) { + assert(Lex.getCode() == tgtok::Let && "Unexpected token"); + Lex.Lex(); + + // Add this entry to the let stack. + std::vector<LetRecord> LetInfo = ParseLetList(); + if (LetInfo.empty()) return true; + LetStack.push_back(LetInfo); + + if (Lex.getCode() != tgtok::In) + return TokError("expected 'in' at end of top-level 'let'"); + Lex.Lex(); + + // If this is a scalar let, just handle it now + if (Lex.getCode() != tgtok::l_brace) { + // LET LetList IN Object + if (ParseObject(CurMultiClass)) + return true; + } else { // Object ::= LETCommand '{' ObjectList '}' + SMLoc BraceLoc = Lex.getLoc(); + // Otherwise, this is a group let. + Lex.Lex(); // eat the '{'. + + // Parse the object list. + if (ParseObjectList(CurMultiClass)) + return true; + + if (Lex.getCode() != tgtok::r_brace) { + TokError("expected '}' at end of top level let command"); + return Error(BraceLoc, "to match this '{'"); + } + Lex.Lex(); + } + + // Outside this let scope, this let block is not active. + LetStack.pop_back(); + return false; +} + +/// ParseMultiClass - Parse a multiclass definition. +/// +/// MultiClassInst ::= MULTICLASS ID TemplateArgList? +/// ':' BaseMultiClassList '{' MultiClassDef+ '}' +/// +bool TGParser::ParseMultiClass() { + assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token"); + Lex.Lex(); // Eat the multiclass token. + + if (Lex.getCode() != tgtok::Id) + return TokError("expected identifier after multiclass for name"); + std::string Name = Lex.getCurStrVal(); + + if (MultiClasses.count(Name)) + return TokError("multiclass '" + Name + "' already defined"); + + CurMultiClass = MultiClasses[Name] = new MultiClass(Name, + Lex.getLoc(), Records); + Lex.Lex(); // Eat the identifier. + + // If there are template args, parse them. + if (Lex.getCode() == tgtok::less) + if (ParseTemplateArgList(0)) + return true; + + bool inherits = false; + + // If there are submulticlasses, parse them. + if (Lex.getCode() == tgtok::colon) { + inherits = true; + + Lex.Lex(); + + // Read all of the submulticlasses. + SubMultiClassReference SubMultiClass = + ParseSubMultiClassReference(CurMultiClass); + while (1) { + // Check for error. + if (SubMultiClass.MC == 0) return true; + + // Add it. + if (AddSubMultiClass(CurMultiClass, SubMultiClass)) + return true; + + if (Lex.getCode() != tgtok::comma) break; + Lex.Lex(); // eat ','. + SubMultiClass = ParseSubMultiClassReference(CurMultiClass); + } + } + + if (Lex.getCode() != tgtok::l_brace) { + if (!inherits) + return TokError("expected '{' in multiclass definition"); + else if (Lex.getCode() != tgtok::semi) + return TokError("expected ';' in multiclass definition"); + else + Lex.Lex(); // eat the ';'. + } else { + if (Lex.Lex() == tgtok::r_brace) // eat the '{'. + return TokError("multiclass must contain at least one def"); + + while (Lex.getCode() != tgtok::r_brace) { + switch (Lex.getCode()) { + default: + return TokError("expected 'let', 'def' or 'defm' in multiclass body"); + case tgtok::Let: + case tgtok::Def: + case tgtok::Defm: + if (ParseObject(CurMultiClass)) + return true; + break; + } + } + Lex.Lex(); // eat the '}'. + } + + CurMultiClass = 0; + return false; +} + +Record *TGParser:: +InstantiateMulticlassDef(MultiClass &MC, + Record *DefProto, + const std::string &DefmPrefix, + SMLoc DefmPrefixLoc) { + // Add in the defm name. If the defm prefix is empty, give each + // instantiated def a unique name. Otherwise, if "#NAME#" exists in the + // name, substitute the prefix for #NAME#. Otherwise, use the defm name + // as a prefix. + std::string DefName = DefProto->getName(); + if (DefmPrefix.empty()) { + DefName = GetNewAnonymousName(); + } else { + std::string::size_type idx = DefName.find("#NAME#"); + if (idx != std::string::npos) { + DefName.replace(idx, 6, DefmPrefix); + } else { + // Add the suffix to the defm name to get the new name. + DefName = DefmPrefix + DefName; + } + } + + Record *CurRec = new Record(DefName, DefmPrefixLoc, Records); + + SubClassReference Ref; + Ref.RefLoc = DefmPrefixLoc; + Ref.Rec = DefProto; + AddSubClass(CurRec, Ref); + + return CurRec; +} + +bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, + Record *CurRec, + SMLoc DefmPrefixLoc, + SMLoc SubClassLoc, + const std::vector<std::string> &TArgs, + std::vector<Init *> &TemplateVals, + bool DeleteArgs) { + // Loop over all of the template arguments, setting them to the specified + // value or leaving them as the default if necessary. + for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { + // Check if a value is specified for this temp-arg. + if (i < TemplateVals.size()) { + // Set it now. + if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(), + TemplateVals[i])) + return true; + + // Resolve it next. + CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i])); + + if (DeleteArgs) + // Now remove it. + CurRec->removeValue(TArgs[i]); + + } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) { + return Error(SubClassLoc, "value not specified for template argument #"+ + utostr(i) + " (" + TArgs[i] + ") of multiclassclass '" + + MC.Rec.getName() + "'"); + } + } + return false; +} + +bool TGParser::ResolveMulticlassDef(MultiClass &MC, + Record *CurRec, + Record *DefProto, + SMLoc DefmPrefixLoc) { + // If the mdef is inside a 'let' expression, add to each def. + for (unsigned i = 0, e = LetStack.size(); i != e; ++i) + for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) + if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, + LetStack[i][j].Bits, LetStack[i][j].Value)) + return Error(DefmPrefixLoc, "when instantiating this defm"); + + // Ensure redefinition doesn't happen. + if (Records.getDef(CurRec->getName())) + return Error(DefmPrefixLoc, "def '" + CurRec->getName() + + "' already defined, instantiating defm with subdef '" + + DefProto->getName() + "'"); + + // Don't create a top level definition for defm inside multiclasses, + // instead, only update the prototypes and bind the template args + // with the new created definition. + if (CurMultiClass) { + for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); + i != e; ++i) + if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) + return Error(DefmPrefixLoc, "defm '" + CurRec->getName() + + "' already defined in this multiclass!"); + CurMultiClass->DefPrototypes.push_back(CurRec); + + // Copy the template arguments for the multiclass into the new def. + const std::vector<std::string> &TA = + CurMultiClass->Rec.getTemplateArgs(); + + for (unsigned i = 0, e = TA.size(); i != e; ++i) { + const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]); + assert(RV && "Template arg doesn't exist?"); + CurRec->addValue(*RV); + } + } else { + Records.addDef(CurRec); + } + + return false; +} + +/// ParseDefm - Parse the instantiation of a multiclass. +/// +/// DefMInst ::= DEFM ID ':' DefmSubClassRef ';' +/// +bool TGParser::ParseDefm(MultiClass *CurMultiClass) { + assert(Lex.getCode() == tgtok::Defm && "Unexpected token!"); + + std::string DefmPrefix; + if (Lex.Lex() == tgtok::Id) { // eat the defm. + DefmPrefix = Lex.getCurStrVal(); + Lex.Lex(); // Eat the defm prefix. + } + + SMLoc DefmPrefixLoc = Lex.getLoc(); + if (Lex.getCode() != tgtok::colon) + return TokError("expected ':' after defm identifier"); + + // Keep track of the new generated record definitions. + std::vector<Record*> NewRecDefs; + + // This record also inherits from a regular class (non-multiclass)? + bool InheritFromClass = false; + + // eat the colon. + Lex.Lex(); + + SMLoc SubClassLoc = Lex.getLoc(); + SubClassReference Ref = ParseSubClassReference(0, true); + + while (1) { + if (Ref.Rec == 0) return true; + + // To instantiate a multiclass, we need to first get the multiclass, then + // instantiate each def contained in the multiclass with the SubClassRef + // template parameters. + MultiClass *MC = MultiClasses[Ref.Rec->getName()]; + assert(MC && "Didn't lookup multiclass correctly?"); + std::vector<Init*> &TemplateVals = Ref.TemplateArgs; + + // Verify that the correct number of template arguments were specified. + const std::vector<std::string> &TArgs = MC->Rec.getTemplateArgs(); + if (TArgs.size() < TemplateVals.size()) + return Error(SubClassLoc, + "more template args specified than multiclass expects"); + + // Loop over all the def's in the multiclass, instantiating each one. + for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) { + Record *DefProto = MC->DefPrototypes[i]; + + Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc); + + if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc, + TArgs, TemplateVals, true/*Delete args*/)) + return Error(SubClassLoc, "could not instantiate def"); + + if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmPrefixLoc)) + return Error(SubClassLoc, "could not instantiate def"); + + NewRecDefs.push_back(CurRec); + } + + + if (Lex.getCode() != tgtok::comma) break; + Lex.Lex(); // eat ','. + + SubClassLoc = Lex.getLoc(); + + // A defm can inherit from regular classes (non-multiclass) as + // long as they come in the end of the inheritance list. + InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0); + + if (InheritFromClass) + break; + + Ref = ParseSubClassReference(0, true); + } + + if (InheritFromClass) { + // Process all the classes to inherit as if they were part of a + // regular 'def' and inherit all record values. + SubClassReference SubClass = ParseSubClassReference(0, false); + while (1) { + // Check for error. + if (SubClass.Rec == 0) return true; + + // Get the expanded definition prototypes and teach them about + // the record values the current class to inherit has + for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) { + Record *CurRec = NewRecDefs[i]; + + // Add it. + if (AddSubClass(CurRec, SubClass)) + return true; + + // Process any variables on the let stack. + for (unsigned i = 0, e = LetStack.size(); i != e; ++i) + for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) + if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, + LetStack[i][j].Bits, LetStack[i][j].Value)) + return true; + } + + if (Lex.getCode() != tgtok::comma) break; + Lex.Lex(); // eat ','. + SubClass = ParseSubClassReference(0, false); + } + } + + if (!CurMultiClass) + for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) + // See Record::setName(). This resolve step will see any new + // name for the def that might have been created when resolving + // inheritance, values and arguments above. + NewRecDefs[i]->resolveReferences(); + + if (Lex.getCode() != tgtok::semi) + return TokError("expected ';' at end of defm"); + Lex.Lex(); + + return false; +} + +/// ParseObject +/// Object ::= ClassInst +/// Object ::= DefInst +/// Object ::= MultiClassInst +/// Object ::= DefMInst +/// Object ::= LETCommand '{' ObjectList '}' +/// Object ::= LETCommand Object +bool TGParser::ParseObject(MultiClass *MC) { + switch (Lex.getCode()) { + default: + return TokError("Expected class, def, defm, multiclass or let definition"); + case tgtok::Let: return ParseTopLevelLet(MC); + case tgtok::Def: return ParseDef(MC); + case tgtok::Defm: return ParseDefm(MC); + case tgtok::Class: return ParseClass(); + case tgtok::MultiClass: return ParseMultiClass(); + } +} + +/// ParseObjectList +/// ObjectList :== Object* +bool TGParser::ParseObjectList(MultiClass *MC) { + while (isObjectStart(Lex.getCode())) { + if (ParseObject(MC)) + return true; + } + return false; +} + +bool TGParser::ParseFile() { + Lex.Lex(); // Prime the lexer. + if (ParseObjectList()) return true; + + // If we have unread input at the end of the file, report it. + if (Lex.getCode() == tgtok::Eof) + return false; + + return TokError("Unexpected input at top level"); +} + diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h new file mode 100644 index 0000000..db8a620 --- /dev/null +++ b/contrib/llvm/lib/TableGen/TGParser.h @@ -0,0 +1,137 @@ +//===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents the Parser for tablegen files. +// +//===----------------------------------------------------------------------===// + +#ifndef TGPARSER_H +#define TGPARSER_H + +#include "TGLexer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/SourceMgr.h" +#include <map> + +namespace llvm { + class Record; + class RecordVal; + class RecordKeeper; + class RecTy; + class Init; + struct MultiClass; + struct SubClassReference; + struct SubMultiClassReference; + + struct LetRecord { + std::string Name; + std::vector<unsigned> Bits; + Init *Value; + SMLoc Loc; + LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V, + SMLoc L) + : Name(N), Bits(B), Value(V), Loc(L) { + } + }; + +class TGParser { + TGLexer Lex; + std::vector<std::vector<LetRecord> > LetStack; + std::map<std::string, MultiClass*> MultiClasses; + + /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the + /// current value. + MultiClass *CurMultiClass; + + // Record tracker + RecordKeeper &Records; +public: + TGParser(SourceMgr &SrcMgr, RecordKeeper &records) : + Lex(SrcMgr), CurMultiClass(0), Records(records) {} + + /// ParseFile - Main entrypoint for parsing a tblgen file. These parser + /// routines return true on error, or false on success. + bool ParseFile(); + + bool Error(SMLoc L, const Twine &Msg) const { + PrintError(L, Msg); + return true; + } + bool TokError(const Twine &Msg) const { + return Error(Lex.getLoc(), Msg); + } + const std::vector<std::string> &getDependencies() const { + return Lex.getDependencies(); + } +private: // Semantic analysis methods. + bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV); + bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, + const std::vector<unsigned> &BitList, Init *V); + bool AddSubClass(Record *Rec, SubClassReference &SubClass); + bool AddSubMultiClass(MultiClass *CurMC, + SubMultiClassReference &SubMultiClass); + +private: // Parser methods. + bool ParseObjectList(MultiClass *MC = 0); + bool ParseObject(MultiClass *MC); + bool ParseClass(); + bool ParseMultiClass(); + Record *InstantiateMulticlassDef(MultiClass &MC, + Record *DefProto, + const std::string &DefmPrefix, + SMLoc DefmPrefixLoc); + bool ResolveMulticlassDefArgs(MultiClass &MC, + Record *DefProto, + SMLoc DefmPrefixLoc, + SMLoc SubClassLoc, + const std::vector<std::string> &TArgs, + std::vector<Init *> &TemplateVals, + bool DeleteArgs); + bool ResolveMulticlassDef(MultiClass &MC, + Record *CurRec, + Record *DefProto, + SMLoc DefmPrefixLoc); + bool ParseDefm(MultiClass *CurMultiClass); + bool ParseDef(MultiClass *CurMultiClass); + bool ParseTopLevelLet(MultiClass *CurMultiClass); + std::vector<LetRecord> ParseLetList(); + + bool ParseObjectBody(Record *CurRec); + bool ParseBody(Record *CurRec); + bool ParseBodyItem(Record *CurRec); + + bool ParseTemplateArgList(Record *CurRec); + std::string ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs); + + SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm); + SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC); + + Init *ParseIDValue(Record *CurRec); + Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc); + Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0); + Init *ParseValue(Record *CurRec, RecTy *ItemType = 0); + std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0); + std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *); + bool ParseOptionalRangeList(std::vector<unsigned> &Ranges); + bool ParseOptionalBitList(std::vector<unsigned> &Ranges); + std::vector<unsigned> ParseRangeList(); + bool ParseRangePiece(std::vector<unsigned> &Ranges); + RecTy *ParseType(); + Init *ParseOperation(Record *CurRec); + RecTy *ParseOperatorType(); + std::string ParseObjectName(); + Record *ParseClassID(); + MultiClass *ParseMultiClassID(); + Record *ParseDefmID(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/TableGen/TableGenBackend.cpp b/contrib/llvm/lib/TableGen/TableGenBackend.cpp new file mode 100644 index 0000000..29588db --- /dev/null +++ b/contrib/llvm/lib/TableGen/TableGenBackend.cpp @@ -0,0 +1,25 @@ +//===- TableGenBackend.cpp - Base class for TableGen Backends ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides useful services for TableGen backends... +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/TableGenBackend.h" +#include "llvm/TableGen/Record.h" +using namespace llvm; + +void TableGenBackend::EmitSourceFileHeader(const std::string &Desc, + raw_ostream &OS) const { + OS << "//===- TableGen'erated file -------------------------------------*-" + " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate" + "d file, do not edit!\n//\n//===------------------------------------" + "----------------------------------===//\n\n"; +} + diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 08dc340..16d0da3 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -15,7 +15,7 @@ #ifndef TARGET_ARM_H #define TARGET_ARM_H -#include "ARMBaseInfo.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -29,19 +29,7 @@ class ARMBaseTargetMachine; class FunctionPass; class JITCodeEmitter; class MachineInstr; -class MCCodeEmitter; class MCInst; -class MCInstrInfo; -class MCObjectWriter; -class MCSubtargetInfo; -class TargetAsmBackend; -class formatted_raw_ostream; - -MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -53,7 +41,6 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); -FunctionPass *createNEONMoveFixPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); @@ -61,12 +48,6 @@ FunctionPass *createThumb2SizeReductionPass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); -/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); - } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index cf333cc..5c727ad 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", "Thumb mode">; +def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true", + "Native client mode">; + //===----------------------------------------------------------------------===// // ARM Subtarget features. // @@ -85,12 +88,16 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", /// Some M architectures don't have the DSP extension (v7E-M vs. v7M) def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", - "Supports v7 DSP instructions in Thumb2.">; + "Supports v7 DSP instructions in Thumb2">; // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; +// M-series ISA? +def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", + "Is microcontroller profile ('M' series)">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -105,7 +112,7 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", [HasV5TEOps]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>; + [HasV6Ops, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops]>; @@ -182,12 +189,14 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM, - FeatureDB]>; + FeatureDB, FeatureMClass]>; // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, + FeatureDSPThumb2]>; def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, + FeatureDSPThumb2]>; // V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, @@ -203,14 +212,14 @@ def : Processor<"cortex-a9-mp", CortexA9Itineraries, // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv]>; + FeatureHWDiv, FeatureMClass]>; // V7EM Processors. def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, FeatureT2XtPk, FeatureVFP2, - FeatureVFPOnlySP]>; + FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index dbc3ee4..ea3319f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -15,15 +15,15 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" #include "ARMAsmPrinter.h" -#include "ARMAddressingModes.h" #include "ARMBuildAttrs.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" -#include "ARMMCExpr.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "InstPrinter/ARMInstPrinter.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" @@ -45,13 +45,13 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <cctype> using namespace llvm; @@ -92,7 +92,7 @@ namespace { case ARMBuildAttrs::Advanced_SIMD_arch: case ARMBuildAttrs::VFP_arch: Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); - break; + break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } @@ -100,13 +100,41 @@ namespace { }; class ObjectAttributeEmitter : public AttributeEmitter { + // This structure holds all attributes, accounting for + // their string/numeric value, so we can later emmit them + // in declaration order, keeping all in the same vector + struct AttributeItemType { + enum { + HiddenAttribute = 0, + NumericAttribute, + TextAttribute + } Type; + unsigned Tag; + unsigned IntValue; + StringRef StringValue; + } AttributeItem; + MCObjectStreamer &Streamer; StringRef CurrentVendor; - SmallString<64> Contents; + SmallVector<AttributeItemType, 64> Contents; + + // Account for the ULEB/String size of each item, + // not just the number of items + size_t ContentsSize; + // FIXME: this should be in a more generic place, but + // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf + size_t getULEBSize(int Value) { + size_t Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); // Is this really necessary? + } while (Value); + return Size; + } public: ObjectAttributeEmitter(MCObjectStreamer &Streamer_) : - Streamer(Streamer_), CurrentVendor("") { } + Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { } void MaybeSwitchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -124,20 +152,32 @@ namespace { } void EmitAttribute(unsigned Attribute, unsigned Value) { - // FIXME: should be ULEB - Contents += Attribute; - Contents += Value; + AttributeItemType attr = { + AttributeItemType::NumericAttribute, + Attribute, + Value, + StringRef("") + }; + ContentsSize += getULEBSize(Attribute); + ContentsSize += getULEBSize(Value); + Contents.push_back(attr); } void EmitTextAttribute(unsigned Attribute, StringRef String) { - Contents += Attribute; - Contents += UppercaseString(String); - Contents += 0; + AttributeItemType attr = { + AttributeItemType::TextAttribute, + Attribute, + 0, + String + }; + ContentsSize += getULEBSize(Attribute); + // String + \0 + ContentsSize += String.size()+1; + + Contents.push_back(attr); } void Finish() { - const size_t ContentsSize = Contents.size(); - // Vendor size + Vendor name + '\0' const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; @@ -151,7 +191,23 @@ namespace { Streamer.EmitIntValue(ARMBuildAttrs::File, 1); Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(Contents, 0); + // Size should have been accounted for already, now + // emit each field as its type (ULEB or String) + for (unsigned int i=0; i<Contents.size(); ++i) { + AttributeItemType item = Contents[i]; + Streamer.EmitULEB128IntValue(item.Tag, 0); + switch (item.Type) { + case AttributeItemType::NumericAttribute: + Streamer.EmitULEB128IntValue(item.IntValue, 0); + break; + case AttributeItemType::TextAttribute: + Streamer.EmitBytes(UppercaseString(item.StringValue), 0); + Streamer.EmitIntValue(0, 1); // '\0' + break; + default: + assert(0 && "Invalid attribute type"); + } + } Contents.clear(); } @@ -184,7 +240,7 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // S registers are described as bit-pieces of a register // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - + unsigned SReg = Reg - ARM::S0; bool odd = SReg & 0x1; unsigned Rx = 256 + (SReg >> 1); @@ -209,12 +265,13 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) unsigned QReg = Reg - ARM::Q0; unsigned D1 = 256 + 2 * QReg; unsigned D2 = D1 + 1; - + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); EmitInt8(dwarf::DW_OP_regx); EmitULEB128(D1); @@ -233,6 +290,8 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { } void ARMAsmPrinter::EmitFunctionEntryLabel() { + OutStreamer.ForceCodeRegion(); + if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); OutStreamer.EmitThumbFunc(CurrentFnSym); @@ -395,16 +454,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - + O << "{" << ARMInstPrinter::getRegisterName(RegBegin); - + // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This // will require changes in the register allocator that'll need to be // propagated down here if the operands change. unsigned RegOps = OpNum + 1; while (MI->getOperand(RegOps).isReg()) { - O << ", " + O << ", " << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg()); RegOps++; } @@ -413,14 +472,34 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } + case 'R': // The most significant register of a pair. + case 'Q': { // The least significant register of a pair. + if (OpNum == 0) + return true; + const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); + if (!FlagsOP.isImm()) + return true; + unsigned Flags = FlagsOP.getImm(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + if (NumVals != 2) + return true; + unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; + if (RegOp >= MI->getNumOperands()) + return true; + const MachineOperand &MO = MI->getOperand(RegOp); + if (!MO.isReg()) + return true; + unsigned Reg = MO.getReg(); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } + // These modifiers are not yet supported. case 'p': // The high single-precision register of a VFP double-precision // register. case 'e': // The low doubleword register of a NEON quad register. case 'f': // The high doubleword register of a NEON quad register. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'Q': // The least significant register of a pair. - case 'R': // The most significant register of a pair. case 'H': // The highest-numbered register of a pair. return true; } @@ -437,7 +516,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. - + switch (ExtraCode[0]) { case 'A': // A memory operand for a VLD1/VST1 instruction. default: return true; // Unknown modifier. @@ -448,7 +527,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } } - + const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]"; @@ -772,13 +851,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); MCSym = OutContext.GetOrCreateSymbol(OS.str()); } else if (ACPV->isBlockAddress()) { - MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress()); + const BlockAddress *BA = + cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(); + MCSym = GetBlockAddressSymbol(BA); } else if (ACPV->isGlobalValue()) { - const GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); MCSym = GetARMGVSymbol(GV); + } else if (ACPV->isMachineBasicBlock()) { + const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB(); + MCSym = MBB->getSymbol(); } else { assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - MCSym = GetExternalSymbolSymbol(ACPV->getSymbol()); + const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); + MCSym = GetExternalSymbolSymbol(Sym); } // Create an MCSymbol for the reference. @@ -822,6 +907,9 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); + // Tag the jump table appropriately for precise disassembly. + OutStreamer.EmitJumpTable32Region(); + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); @@ -847,6 +935,11 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol, OutContext), OutContext); + // If we're generating a table of Thumb addresses in static relocation + // model, we need to add one to keep interworking correctly. + else if (AFI->isThumbFunction()) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext), + OutContext); OutStreamer.EmitValue(Expr, 4); } } @@ -859,6 +952,14 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { unsigned JTI = MO1.getIndex(); // Emit a label for the jump table. + if (MI->getOpcode() == ARM::t2TBB_JT) { + OutStreamer.EmitJumpTable8Region(); + } else if (MI->getOpcode() == ARM::t2TBH_JT) { + OutStreamer.EmitJumpTable16Region(); + } else { + OutStreamer.EmitJumpTable32Region(); + } + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); @@ -881,6 +982,8 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { MCInst BrInst; BrInst.setOpcode(ARM::t2B); BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr)); + BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + BrInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(BrInst); continue; } @@ -994,7 +1097,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { i != NumOps; ++i) RegList.push_back(MI->getOperand(i).getReg()); break; - case ARM::STR_PRE: + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); RegList.push_back(SrcReg); @@ -1074,10 +1178,20 @@ extern cl::opt<bool> EnableARMEHABI; #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { + if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY) + OutStreamer.EmitCodeRegion(); + + // Emit unwinding stuff for frame-related instructions + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + EmitUnwindingInstruction(MI); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(OutStreamer, MI)) return; + assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && + "Pseudo flag setting opcode should be expanded early"); + // Check for manual lowerings. unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -1372,6 +1486,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); EmitAlignment(2); + + // Mark the constant pool entry as data if we're not already in a data + // region. + OutStreamer.EmitDataRegion(); OutStreamer.EmitLabel(GetCPISymbol(LabelId)); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; @@ -1379,7 +1497,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MCPE.Val.ConstVal); - return; } case ARM::t2BR_JT: { @@ -1590,6 +1707,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; TmpInst.setOpcode(ARM::tB); TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr)); + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } { @@ -1804,10 +1923,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - // Emit unwinding stuff for frame-related instructions - if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) - EmitUnwindingInstruction(MI); - OutStreamer.EmitInstruction(TmpInst); } @@ -1815,20 +1930,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Target Registry Stuff //===----------------------------------------------------------------------===// -static MCInstPrinter *createARMMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); - - TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 649bd7d..408edfc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -13,11 +13,11 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -45,6 +46,10 @@ static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); +static cl::opt<bool> +WidenVMOVS("widen-vmovs", cl::Hidden, + cl::desc("Widen ARM vmovs to vmovd when possible")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { unsigned MLxOpc; // MLA / MLS opcode @@ -171,7 +176,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) .addImm(Pred).addReg(0).addReg(0); } else @@ -399,6 +404,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); int BccOpc = !AFI->isThumbFunction() ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); + bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -406,9 +412,12 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, "ARM branch conditions have two components!"); if (FBB == 0) { - if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - else + if (Cond.empty()) { // Unconditional branch? + if (isThumb) + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); + else + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); return 1; @@ -417,7 +426,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Two-way conditional branch. BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + if (isThumb) + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); + else + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; } @@ -627,7 +639,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool SPRDest = ARM::SPRRegClass.contains(DestReg); bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); - unsigned Opc; + unsigned Opc = 0; if (SPRDest && SPRSrc) Opc = ARM::VMOVS; else if (GPRDest && SPRSrc) @@ -638,19 +650,40 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; - else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVQQ; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVQQQQ; - else - llvm_unreachable("Impossible reg-to-reg copy"); - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); - MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (Opc == ARM::VORRq) + if (Opc) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) + if (Opc == ARM::VORRq) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); AddDefaultPred(MIB); + return; + } + + // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || + ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); + unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? + ARM::qsub_1 : ARM::qsub_3; + for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, i); + unsigned Src = TRI->getSubReg(SrcReg, i); + MachineInstrBuilder Mov = + AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) + .addReg(Dst, RegState::Define) + .addReg(Src, getKillRegState(KillSrc)) + .addReg(Src, getKillRegState(KillSrc))); + if (i == EndSubReg) { + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); + } + } + return; + } + llvm_unreachable("Impossible reg-to-reg copy"); } static const @@ -683,82 +716,84 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. Likewise, rGPR. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass - || RC == ARM::rGPRRegisterClass) - RC = ARM::GPRRegisterClass; - - switch (RC->getID()) { - case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) + switch (RC->getSize()) { + case 4: + if (ARM::GPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::SPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) + } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::DPRRegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::DPR_8RegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) + } else + llvm_unreachable("Unknown reg class!"); + break; + case 8: + if (ARM::DPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::QPRRegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::QPR_8RegClassID: - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + } else + llvm_unreachable("Unknown reg class!"); + break; + case 16: + if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); - } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) + } else { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addMemOperand(MMO)); - } - break; - case ARM::QQPRRegClassID: - case ARM::QQPR_VFP2RegClassID: - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - // FIXME: It's possible to only store part of the QQ register if the - // spilled def has a sub-register index. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); - } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - } - break; - case ARM::QQQQPRRegClassID: { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); - break; - } - default: - llvm_unreachable("Unknown regclass!"); + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 64: + if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + } else + llvm_unreachable("Unknown reg class!"); + break; + default: + llvm_unreachable("Unknown reg class!"); } } @@ -809,6 +844,12 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } +unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + const MachineMemOperand *Dummy; + return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); +} + void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, @@ -826,72 +867,77 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass - || RC == ARM::rGPRRegisterClass) - RC = ARM::GPRRegisterClass; - - switch (RC->getID()) { - case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) + switch (RC->getSize()) { + case 4: + if (ARM::GPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::SPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + + } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::DPRRegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::DPR_8RegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) + case 8: + if (ARM::DPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QPRRegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::QPR_8RegClassID: - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + case 16: + if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); - } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) - .addFrameIndex(FI) - .addMemOperand(MMO)); - } + } else { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) + .addFrameIndex(FI) + .addMemOperand(MMO)); + } + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QQPRRegClassID: - case ARM::QQPR_VFP2RegClassID: - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); - } else { - MachineInstrBuilder MIB = + } else { + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - } + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + } + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QQQQPRRegClassID: { - MachineInstrBuilder MIB = + case 64: + if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + } else + llvm_unreachable("Unknown reg class!"); break; - } default: llvm_unreachable("Unknown regclass!"); } @@ -944,6 +990,78 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } +unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + const MachineMemOperand *Dummy; + return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); +} + +bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ + // This hook gets to expand COPY instructions before they become + // copyPhysReg() calls. Look for VMOVS instructions that can legally be + // widened to VMOVD. We prefer the VMOVD when possible because it may be + // changed into a VORR that can go down the NEON pipeline. + if (!WidenVMOVS || !MI->isCopy()) + return false; + + // Look for a copy between even S-registers. That is where we keep floats + // when using NEON v2f32 instructions for f32 arithmetic. + unsigned DstRegS = MI->getOperand(0).getReg(); + unsigned SrcRegS = MI->getOperand(1).getReg(); + if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) + return false; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, + &ARM::DPRRegClass); + unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DstRegD || !SrcRegD) + return false; + + // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only + // legal if the COPY already defines the full DstRegD, and it isn't a + // sub-register insertion. + if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) + return false; + + // A dead copy shouldn't show up here, but reject it just in case. + if (MI->getOperand(0).isDead()) + return false; + + // All clear, widen the COPY. + DEBUG(dbgs() << "widening: " << *MI); + + // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg + // or some other super-register. + int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); + if (ImpDefIdx != -1) + MI->RemoveOperand(ImpDefIdx); + + // Change the opcode and operands. + MI->setDesc(get(ARM::VMOVD)); + MI->getOperand(0).setReg(DstRegD); + MI->getOperand(1).setReg(SrcRegD); + AddDefaultPred(MachineInstrBuilder(MI)); + + // We are now reading SrcRegD instead of SrcRegS. This may upset the + // register scavenger and machine verifier, so we need to indicate that we + // are reading an undefined value from SrcRegD, but a proper value from + // SrcRegS. + MI->getOperand(1).setIsUndef(); + MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + + // SrcRegD may actually contain an unrelated value in the ssub_1 + // sub-register. Don't kill it. Only kill the ssub_0 sub-register. + if (MI->getOperand(1).isKill()) { + MI->getOperand(1).setIsKill(false); + MI->addRegisterKilled(SrcRegS, TRI, true); + } + + DEBUG(dbgs() << "replaced by: " << *MI); + return true; +} + MachineInstr* ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -974,17 +1092,24 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { // instructions, so that's probably OK, but is PIC always correct when // we get here? if (ACPV->isGlobalValue()) - NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, - ARMCP::CPValue, 4); + NewCPV = ARMConstantPoolConstant:: + Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, + ARMCP::CPValue, 4); else if (ACPV->isExtSymbol()) - NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), - ACPV->getSymbol(), PCLabelId, 4); + NewCPV = ARMConstantPoolSymbol:: + Create(MF.getFunction()->getContext(), + cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); else if (ACPV->isBlockAddress()) - NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, - ARMCP::CPBlockAddress, 4); + NewCPV = ARMConstantPoolConstant:: + Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); else if (ACPV->isLSDA()) - NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, - ARMCP::CPLSDA, 4); + NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, + ARMCP::CPLSDA, 4); + else if (ACPV->isMachineBasicBlock()) + NewCPV = ARMConstantPoolMBB:: + Create(MF.getFunction()->getContext(), + cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); @@ -1289,7 +1414,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, // Attempt to estimate the relative costs of predication versus branching. unsigned TUnpredCost = Probability.getNumerator() * TCycles; TUnpredCost /= Probability.getDenominator(); - + uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); unsigned FUnpredCost = Comp * FCycles; FUnpredCost /= Probability.getDenominator(); @@ -1330,6 +1455,57 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { } +/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the +/// instruction is encoded with an 'S' bit is determined by the optional CPSR +/// def operand. +/// +/// This will go away once we can teach tblgen how to set the optional CPSR def +/// operand itself. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADDSri, ARM::ADDri}, + {ARM::ADDSrr, ARM::ADDrr}, + {ARM::ADDSrsi, ARM::ADDrsi}, + {ARM::ADDSrsr, ARM::ADDrsr}, + + {ARM::SUBSri, ARM::SUBri}, + {ARM::SUBSrr, ARM::SUBrr}, + {ARM::SUBSrsi, ARM::SUBrsi}, + {ARM::SUBSrsr, ARM::SUBrsr}, + + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrsi, ARM::RSBrsi}, + {ARM::RSBSrsr, ARM::RSBrsr}, + + {ARM::t2ADDSri, ARM::t2ADDri}, + {ARM::t2ADDSrr, ARM::t2ADDrr}, + {ARM::t2ADDSrs, ARM::t2ADDrs}, + + {ARM::t2SUBSri, ARM::t2SUBri}, + {ARM::t2SUBSrr, ARM::t2SUBrr}, + {ARM::t2SUBSrs, ARM::t2SUBrs}, + + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) { + if (OldOpc == OpcPair->PseudoOpc) { + return OpcPair->MachineOpc; + } + } + return 0; +} + void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, @@ -1862,7 +2038,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::STMIB_UPD: case ARM::tLDMIA: case ARM::tLDMIA_UPD: - case ARM::tSTMIA: case ARM::tSTMIA_UPD: case ARM::tPOP_RET: case ARM::tPOP: @@ -2128,7 +2303,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::STMDA_UPD: case ARM::STMDB_UPD: case ARM::STMIB_UPD: - case ARM::tSTMIA: case ARM::tSTMIA_UPD: case ARM::tPOP_RET: case ARM::tPOP: @@ -2567,6 +2741,15 @@ hasLowDefLatency(const InstrItineraryData *ItinData, return false; } +bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const { + if (convertAddSubFlagsOpcode(MI->getOpcode())) { + ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; + return false; + } + return true; +} + bool ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, unsigned &AddSubOpc, @@ -2582,3 +2765,66 @@ ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, HasLane = Entry.HasLane; return true; } + +//===----------------------------------------------------------------------===// +// Execution domains. +//===----------------------------------------------------------------------===// +// +// Some instructions go down the NEON pipeline, some go down the VFP pipeline, +// and some can go down both. The vmov instructions go down the VFP pipeline, +// but they can be changed to vorr equivalents that are executed by the NEON +// pipeline. +// +// We use the following execution domain numbering: +// +enum ARMExeDomain { + ExeGeneric = 0, + ExeVFP = 1, + ExeNEON = 2 +}; +// +// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h +// +std::pair<uint16_t, uint16_t> +ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { + // VMOVD is a VFP instruction, but can be changed to NEON if it isn't + // predicated. + if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + + // No other instructions can be swizzled, so just determine their domain. + unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; + + if (Domain & ARMII::DomainNEON) + return std::make_pair(ExeNEON, 0); + + // Certain instructions can go either way on Cortex-A8. + // Treat them as NEON instructions. + if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) + return std::make_pair(ExeNEON, 0); + + if (Domain & ARMII::DomainVFP) + return std::make_pair(ExeVFP, 0); + + return std::make_pair(ExeGeneric, 0); +} + +void +ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { + // We only know how to change VMOVD into VORR. + assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); + if (Domain != ExeNEON) + return; + + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); + + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 507e897..0f9f321 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -27,146 +27,6 @@ namespace llvm { class ARMSubtarget; class ARMBaseRegisterInfo; -/// ARMII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace ARMII { - enum { - //===------------------------------------------------------------------===// - // Instruction Flags. - - //===------------------------------------------------------------------===// - // This four-bit field describes the addressing mode used. - AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h - - // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load - // and store ops only. Generic "updating" flag is used for ld/st multiple. - // The index mode enums are declared in ARMBaseInfo.h - IndexModeShift = 5, - IndexModeMask = 3 << IndexModeShift, - - //===------------------------------------------------------------------===// - // Instruction encoding formats. - // - FormShift = 7, - FormMask = 0x3f << FormShift, - - // Pseudo instructions - Pseudo = 0 << FormShift, - - // Multiply instructions - MulFrm = 1 << FormShift, - - // Branch instructions - BrFrm = 2 << FormShift, - BrMiscFrm = 3 << FormShift, - - // Data Processing instructions - DPFrm = 4 << FormShift, - DPSoRegFrm = 5 << FormShift, - - // Load and Store - LdFrm = 6 << FormShift, - StFrm = 7 << FormShift, - LdMiscFrm = 8 << FormShift, - StMiscFrm = 9 << FormShift, - LdStMulFrm = 10 << FormShift, - - LdStExFrm = 11 << FormShift, - - // Miscellaneous arithmetic instructions - ArithMiscFrm = 12 << FormShift, - SatFrm = 13 << FormShift, - - // Extend instructions - ExtFrm = 14 << FormShift, - - // VFP formats - VFPUnaryFrm = 15 << FormShift, - VFPBinaryFrm = 16 << FormShift, - VFPConv1Frm = 17 << FormShift, - VFPConv2Frm = 18 << FormShift, - VFPConv3Frm = 19 << FormShift, - VFPConv4Frm = 20 << FormShift, - VFPConv5Frm = 21 << FormShift, - VFPLdStFrm = 22 << FormShift, - VFPLdStMulFrm = 23 << FormShift, - VFPMiscFrm = 24 << FormShift, - - // Thumb format - ThumbFrm = 25 << FormShift, - - // Miscelleaneous format - MiscFrm = 26 << FormShift, - - // NEON formats - NGetLnFrm = 27 << FormShift, - NSetLnFrm = 28 << FormShift, - NDupFrm = 29 << FormShift, - NLdStFrm = 30 << FormShift, - N1RegModImmFrm= 31 << FormShift, - N2RegFrm = 32 << FormShift, - NVCVTFrm = 33 << FormShift, - NVDupLnFrm = 34 << FormShift, - N2RegVShLFrm = 35 << FormShift, - N2RegVShRFrm = 36 << FormShift, - N3RegFrm = 37 << FormShift, - N3RegVShFrm = 38 << FormShift, - NVExtFrm = 39 << FormShift, - NVMulSLFrm = 40 << FormShift, - NVTBLFrm = 41 << FormShift, - - //===------------------------------------------------------------------===// - // Misc flags. - - // UnaryDP - Indicates this is a unary data processing instruction, i.e. - // it doesn't have a Rn operand. - UnaryDP = 1 << 13, - - // Xform16Bit - Indicates this Thumb2 instruction may be transformed into - // a 16-bit Thumb instruction if certain conditions are met. - Xform16Bit = 1 << 14, - - //===------------------------------------------------------------------===// - // Code domain. - DomainShift = 15, - DomainMask = 7 << DomainShift, - DomainGeneral = 0 << DomainShift, - DomainVFP = 1 << DomainShift, - DomainNEON = 2 << DomainShift, - DomainNEONA8 = 4 << DomainShift, - - //===------------------------------------------------------------------===// - // Field shifts - such shifts are used to set field while generating - // machine instructions. - // - // FIXME: This list will need adjusting/fixing as the MC code emitter - // takes shape and the ARMCodeEmitter.cpp bits go away. - ShiftTypeShift = 4, - - M_BitShift = 5, - ShiftImmShift = 5, - ShiftShift = 7, - N_BitShift = 7, - ImmHiShift = 8, - SoRotImmShift = 8, - RegRsShift = 8, - ExtRotImmShift = 10, - RegRdLoShift = 12, - RegRdShift = 12, - RegRdHiShift = 16, - RegRnShift = 16, - S_BitShift = 20, - W_BitShift = 21, - AM3_I_BitShift = 22, - D_BitShift = 22, - U_BitShift = 23, - P_BitShift = 24, - I_BitShift = 25, - CondShift = 28 - }; -} - class ARMBaseInstrInfo : public ARMGenInstrInfo { const ARMSubtarget &Subtarget; @@ -241,6 +101,10 @@ public: int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -259,6 +123,8 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -346,6 +212,12 @@ public: int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + + /// VFP/NEON execution domains. + std::pair<uint16_t, uint16_t> + getExecutionDomain(const MachineInstr *MI) const; + void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + private: int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -382,6 +254,9 @@ private: bool hasLowDefLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx) const; + /// verifyInstruction - Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -464,6 +339,12 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); + +/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether +/// the instruction is encoded with an 'S' bit is determined by the optional +/// CPSR def operand. +unsigned convertAddSubFlagsOpcode(unsigned OldOpc); + /// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of /// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 /// code. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ba42295..7c42342 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/Debug.h" @@ -57,7 +56,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(), TII(tii), STI(sti), + : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -354,7 +353,7 @@ const TargetRegisterClass* ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const { const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { switch (Super->getID()) { case ARM::GPRRegClassID: @@ -375,6 +374,13 @@ ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } +const TargetRegisterClass * +ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &ARM::CCRRegClass) + return 0; // Can't copy CCR registers. + return RC; +} + unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { @@ -487,19 +493,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven1); + return makeArrayRef(GPREven1); else - return ArrayRef<unsigned>(GPREven4); + return makeArrayRef(GPREven4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven2); + return makeArrayRef(GPREven2); else - return ArrayRef<unsigned>(GPREven5); + return makeArrayRef(GPREven5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven3); + return makeArrayRef(GPREven3); else - return ArrayRef<unsigned>(GPREven6); + return makeArrayRef(GPREven6); } } else if (HintType == ARMRI::RegPairOdd) { if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) @@ -509,19 +515,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd1); + return makeArrayRef(GPROdd1); else - return ArrayRef<unsigned>(GPROdd4); + return makeArrayRef(GPROdd4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd2); + return makeArrayRef(GPROdd2); else - return ArrayRef<unsigned>(GPROdd5); + return makeArrayRef(GPROdd5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd3); + return makeArrayRef(GPROdd3); else - return ArrayRef<unsigned>(GPROdd6); + return makeArrayRef(GPROdd6); } } return RC->getRawAllocationOrder(MF); @@ -649,10 +655,6 @@ cannotEliminateFrame(const MachineFunction &MF) const { || needsStackRealignment(MF); } -unsigned ARMBaseRegisterInfo::getRARegister() const { - return ARM::LR; -} - unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -672,99 +674,54 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { return 0; } -int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} - unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const { switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. // So no R12, etc. - case ARM::R1: - return ARM::R0; - case ARM::R3: - return ARM::R2; - case ARM::R5: - return ARM::R4; + case ARM::R1: return ARM::R0; + case ARM::R3: return ARM::R2; + case ARM::R5: return ARM::R4; case ARM::R7: return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) ? 0 : ARM::R6; - case ARM::R9: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; - - case ARM::S1: - return ARM::S0; - case ARM::S3: - return ARM::S2; - case ARM::S5: - return ARM::S4; - case ARM::S7: - return ARM::S6; - case ARM::S9: - return ARM::S8; - case ARM::S11: - return ARM::S10; - case ARM::S13: - return ARM::S12; - case ARM::S15: - return ARM::S14; - case ARM::S17: - return ARM::S16; - case ARM::S19: - return ARM::S18; - case ARM::S21: - return ARM::S20; - case ARM::S23: - return ARM::S22; - case ARM::S25: - return ARM::S24; - case ARM::S27: - return ARM::S26; - case ARM::S29: - return ARM::S28; - case ARM::S31: - return ARM::S30; - - case ARM::D1: - return ARM::D0; - case ARM::D3: - return ARM::D2; - case ARM::D5: - return ARM::D4; - case ARM::D7: - return ARM::D6; - case ARM::D9: - return ARM::D8; - case ARM::D11: - return ARM::D10; - case ARM::D13: - return ARM::D12; - case ARM::D15: - return ARM::D14; - case ARM::D17: - return ARM::D16; - case ARM::D19: - return ARM::D18; - case ARM::D21: - return ARM::D20; - case ARM::D23: - return ARM::D22; - case ARM::D25: - return ARM::D24; - case ARM::D27: - return ARM::D26; - case ARM::D29: - return ARM::D28; - case ARM::D31: - return ARM::D30; + case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S1: return ARM::S0; + case ARM::S3: return ARM::S2; + case ARM::S5: return ARM::S4; + case ARM::S7: return ARM::S6; + case ARM::S9: return ARM::S8; + case ARM::S11: return ARM::S10; + case ARM::S13: return ARM::S12; + case ARM::S15: return ARM::S14; + case ARM::S17: return ARM::S16; + case ARM::S19: return ARM::S18; + case ARM::S21: return ARM::S20; + case ARM::S23: return ARM::S22; + case ARM::S25: return ARM::S24; + case ARM::S27: return ARM::S26; + case ARM::S29: return ARM::S28; + case ARM::S31: return ARM::S30; + + case ARM::D1: return ARM::D0; + case ARM::D3: return ARM::D2; + case ARM::D5: return ARM::D4; + case ARM::D7: return ARM::D6; + case ARM::D9: return ARM::D8; + case ARM::D11: return ARM::D10; + case ARM::D13: return ARM::D12; + case ARM::D15: return ARM::D14; + case ARM::D17: return ARM::D16; + case ARM::D19: return ARM::D18; + case ARM::D21: return ARM::D20; + case ARM::D23: return ARM::D22; + case ARM::D25: return ARM::D24; + case ARM::D27: return ARM::D26; + case ARM::D29: return ARM::D28; + case ARM::D31: return ARM::D30; } return 0; @@ -776,85 +733,48 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, default: break; // Return 0 if either register of the pair is a special register. // So no R12, etc. - case ARM::R0: - return ARM::R1; - case ARM::R2: - return ARM::R3; - case ARM::R4: - return ARM::R5; + case ARM::R0: return ARM::R1; + case ARM::R2: return ARM::R3; + case ARM::R4: return ARM::R5; case ARM::R6: return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) ? 0 : ARM::R7; - case ARM::R8: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; - - case ARM::S0: - return ARM::S1; - case ARM::S2: - return ARM::S3; - case ARM::S4: - return ARM::S5; - case ARM::S6: - return ARM::S7; - case ARM::S8: - return ARM::S9; - case ARM::S10: - return ARM::S11; - case ARM::S12: - return ARM::S13; - case ARM::S14: - return ARM::S15; - case ARM::S16: - return ARM::S17; - case ARM::S18: - return ARM::S19; - case ARM::S20: - return ARM::S21; - case ARM::S22: - return ARM::S23; - case ARM::S24: - return ARM::S25; - case ARM::S26: - return ARM::S27; - case ARM::S28: - return ARM::S29; - case ARM::S30: - return ARM::S31; - - case ARM::D0: - return ARM::D1; - case ARM::D2: - return ARM::D3; - case ARM::D4: - return ARM::D5; - case ARM::D6: - return ARM::D7; - case ARM::D8: - return ARM::D9; - case ARM::D10: - return ARM::D11; - case ARM::D12: - return ARM::D13; - case ARM::D14: - return ARM::D15; - case ARM::D16: - return ARM::D17; - case ARM::D18: - return ARM::D19; - case ARM::D20: - return ARM::D21; - case ARM::D22: - return ARM::D23; - case ARM::D24: - return ARM::D25; - case ARM::D26: - return ARM::D27; - case ARM::D28: - return ARM::D29; - case ARM::D30: - return ARM::D31; + case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: return ARM::S1; + case ARM::S2: return ARM::S3; + case ARM::S4: return ARM::S5; + case ARM::S6: return ARM::S7; + case ARM::S8: return ARM::S9; + case ARM::S10: return ARM::S11; + case ARM::S12: return ARM::S13; + case ARM::S14: return ARM::S15; + case ARM::S16: return ARM::S17; + case ARM::S18: return ARM::S19; + case ARM::S20: return ARM::S21; + case ARM::S22: return ARM::S23; + case ARM::S24: return ARM::S25; + case ARM::S26: return ARM::S27; + case ARM::S28: return ARM::S29; + case ARM::S30: return ARM::S31; + + case ARM::D0: return ARM::D1; + case ARM::D2: return ARM::D3; + case ARM::D4: return ARM::D5; + case ARM::D6: return ARM::D7; + case ARM::D8: return ARM::D9; + case ARM::D10: return ARM::D11; + case ARM::D12: return ARM::D13; + case ARM::D14: return ARM::D15; + case ARM::D16: return ARM::D17; + case ARM::D18: return ARM::D19; + case ARM::D20: return ARM::D21; + case ARM::D22: return ARM::D23; + case ARM::D24: return ARM::D25; + case ARM::D26: return ARM::D27; + case ARM::D28: return ARM::D29; + case ARM::D30: return ARM::D31; } return 0; @@ -1111,11 +1031,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); - MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset); + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset)); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(AddDefaultPred(MIB)); + AddDefaultCC(MIB); } void @@ -1143,6 +1063,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); } assert (Done && "Unable to resolve frame index!"); + (void)Done; } bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index b4b4059..fee17ff 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -33,19 +33,6 @@ namespace ARMRI { }; } -/// isARMLowRegister - Returns true if the register is low register r0-r7. -/// -static inline bool isARMLowRegister(unsigned Reg) { - using namespace ARM; - switch (Reg) { - case R0: case R1: case R2: case R3: - case R4: case R5: case R6: case R7: - return true; - default: - return false; - } -} - /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { @@ -129,6 +116,8 @@ public: unsigned &NewSubIdx) const; const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getCrossCopyRegClass(const TargetRegisterClass *RC) const; const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; @@ -164,7 +153,6 @@ public: bool cannotEliminateFrame(const MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getBaseRegister() const { return BasePtr; } @@ -172,9 +160,6 @@ public: unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - bool isLowRegister(unsigned Reg) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index d6fca62..4148d4a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "jit" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" #include "ARMInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -161,11 +161,11 @@ namespace { // are already handled elsewhere. They are placeholders to allow this // encoder to continue to function until the MC encoder is sufficiently // far along that this one can be eliminated entirely. - unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) + unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) const { return 0; } - unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) + unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } - unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) + unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { return 0; } @@ -189,13 +189,17 @@ namespace { unsigned Op) const { return 0; } unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op) + unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } @@ -203,8 +207,12 @@ namespace { const { return 0; } unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) + const { return 0; } unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op) @@ -213,10 +221,6 @@ namespace { const { return 0; } unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) @@ -230,8 +234,6 @@ namespace { const { return 0; } unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getMsbOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } unsigned getSsatBitPosValue(const MachineInstr &MI, unsigned Op) const { return 0; } uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx) @@ -268,6 +270,8 @@ namespace { const { return 0;} uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0;} + uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0;} uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) @@ -632,15 +636,16 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - const GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, isa<Function>(GV), Subtarget->GVIsIndirectSymbol(GV, RelocM), (intptr_t)ACPV); - } else { - emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute); + } else { + const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); + emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); } emitWordLE(0); } else { @@ -983,7 +988,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const MCInstrDesc &MCID) const { - for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){ + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index f45ebdc..3e3a413 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -15,10 +15,10 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -739,7 +739,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; - BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); + if (!isThumb) + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); + else + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB) + .addImm(ARMCC::AL).addReg(0); ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -1151,7 +1155,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // targets will be exchanged, and the altered branch may be out of // range, so the machinery has to know about it. int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); @@ -1512,7 +1520,11 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); - BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); + if (isThumb) + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) + .addImm(ARMCC::AL).addReg(0); + else + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); @@ -1891,7 +1903,8 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); - BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB); + BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB) + .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. MF.RenumberBlocks(NewBB); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 165a1d8..aadfd47 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,79 +17,57 @@ #include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Type.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, - ARMCP::ARMCPKind K, +//===----------------------------------------------------------------------===// +// ARMConstantPoolValue +//===----------------------------------------------------------------------===// + +ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id, + ARMCP::ARMCPKind kind, unsigned char PCAdj, - ARMCP::ARMCPModifier Modif, - bool AddCA) - : MachineConstantPoolValue((const Type*)cval->getType()), - CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), - Modifier(Modif), AddCurrentAddress(AddCA) {} - -ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, - const char *s, unsigned id, + ARMCP::ARMCPModifier modifier, + bool addCurrentAddress) + : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind), + PCAdjust(PCAdj), Modifier(modifier), + AddCurrentAddress(addCurrentAddress) {} + +ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id, + ARMCP::ARMCPKind kind, unsigned char PCAdj, - ARMCP::ARMCPModifier Modif, - bool AddCA) - : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)), - CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), - PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} - -ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, - ARMCP::ARMCPModifier Modif) - : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), - CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), - Modifier(Modif), AddCurrentAddress(false) {} - -const GlobalValue *ARMConstantPoolValue::getGV() const { - return dyn_cast_or_null<GlobalValue>(CVal); -} + ARMCP::ARMCPModifier modifier, + bool addCurrentAddress) + : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)), + LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier), + AddCurrentAddress(addCurrentAddress) {} -const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { - return dyn_cast_or_null<BlockAddress>(CVal); -} +ARMConstantPoolValue::~ARMConstantPoolValue() {} -static bool CPV_streq(const char *S1, const char *S2) { - if (S1 == S2) - return true; - if (S1 && S2 && strcmp(S1, S2) == 0) - return true; - return false; +const char *ARMConstantPoolValue::getModifierText() const { + switch (Modifier) { + default: llvm_unreachable("Unknown modifier!"); + // FIXME: Are these case sensitive? It'd be nice to lower-case all the + // strings if that's legal. + case ARMCP::no_modifier: return "none"; + case ARMCP::TLSGD: return "tlsgd"; + case ARMCP::GOT: return "GOT"; + case ARMCP::GOTOFF: return "GOTOFF"; + case ARMCP::GOTTPOFF: return "gottpoff"; + case ARMCP::TPOFF: return "tpoff"; + } } int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - if (CPV->CVal == CVal && - CPV->LabelId == LabelId && - CPV->PCAdjust == PCAdjust && - CPV_streq(CPV->S, S) && - CPV->Modifier == Modifier) - return i; - } - } - + assert(false && "Shouldn't be calling this directly!"); return -1; } -ARMConstantPoolValue::~ARMConstantPoolValue() { - free((void*)S); -} - void -ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(CVal); - ID.AddPointer(S); +ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddInteger(LabelId); ID.AddInteger(PCAdjust); } @@ -97,9 +75,7 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { bool ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { if (ACPV->Kind == Kind && - ACPV->CVal == CVal && ACPV->PCAdjust == PCAdjust && - CPV_streq(ACPV->S, S) && ACPV->Modifier == Modifier) { if (ACPV->LabelId == LabelId) return true; @@ -115,12 +91,7 @@ void ARMConstantPoolValue::dump() const { errs() << " " << *this; } - void ARMConstantPoolValue::print(raw_ostream &O) const { - if (CVal) - O << CVal->getName(); - else - O << S; if (Modifier) O << "(" << getModifierText() << ")"; if (PCAdjust != 0) { O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; @@ -128,3 +99,221 @@ void ARMConstantPoolValue::print(raw_ostream &O) const { O << ")"; } } + +//===----------------------------------------------------------------------===// +// ARMConstantPoolConstant +//===----------------------------------------------------------------------===// + +ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty, + const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress), + CVal(C) {} + +ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier, + AddCurrentAddress), + CVal(C) {} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) { + return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0, + ARMCP::no_modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const GlobalValue *GV, + ARMCP::ARMCPModifier Modifier) { + return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()), + GV, 0, ARMCP::CPValue, 0, + Modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, unsigned char PCAdj) { + return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, + ARMCP::no_modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) { + return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier, + AddCurrentAddress); +} + +const GlobalValue *ARMConstantPoolConstant::getGV() const { + return dyn_cast_or_null<GlobalValue>(CVal); +} + +const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { + return dyn_cast_or_null<BlockAddress>(CVal); +} + +int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV); + if (!APC) continue; + if (APC->CVal == CVal && equals(APC)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV); + return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(CVal); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolConstant::print(raw_ostream &O) const { + O << CVal->getName(); + ARMConstantPoolValue::print(O); +} + +//===----------------------------------------------------------------------===// +// ARMConstantPoolSymbol +//===----------------------------------------------------------------------===// + +ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, + unsigned id, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, + AddCurrentAddress), + S(strdup(s)) {} + +ARMConstantPoolSymbol::~ARMConstantPoolSymbol() { + free((void*)S); +} + +ARMConstantPoolSymbol * +ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, + unsigned ID, unsigned char PCAdj) { + return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); +} + +static bool CPV_streq(const char *S1, const char *S2) { + if (S1 == S2) + return true; + if (S1 && S2 && strcmp(S1, S2) == 0) + return true; + return false; +} + +int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); + if (!APS) continue; + + if (CPV_streq(APS->S, S) && equals(APS)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV); + return ACPS && CPV_streq(ACPS->S, S) && + ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(S); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolSymbol::print(raw_ostream &O) const { + O << S; + ARMConstantPoolValue::print(O); +} + +//===----------------------------------------------------------------------===// +// ARMConstantPoolMBB +//===----------------------------------------------------------------------===// + +ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned id, unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj, + Modifier, AddCurrentAddress), + MBB(mbb) {} + +ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned ID, + unsigned char PCAdj) { + return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false); +} + +int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV); + if (!APMBB) continue; + + if (APMBB->MBB == MBB && equals(APMBB)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV); + return ACPMBB && ACPMBB->MBB == MBB && + ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(MBB); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolMBB::print(raw_ostream &O) const { + ARMConstantPoolValue::print(O); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index d008811..0d0def3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -20,17 +20,19 @@ namespace llvm { -class Constant; class BlockAddress; +class Constant; class GlobalValue; class LLVMContext; +class MachineBasicBlock; namespace ARMCP { enum ARMCPKind { CPValue, CPExtSymbol, CPBlockAddress, - CPLSDA + CPLSDA, + CPMachineBasicBlock }; enum ARMCPModifier { @@ -47,8 +49,6 @@ namespace ARMCP { /// represent PC-relative displacement between the address of the load /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - const Constant *CVal; // Constant being loaded. - const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. ARMCP::ARMCPKind Kind; // Kind of constant. unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative. @@ -56,60 +56,54 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) bool AddCurrentAddress; +protected: + ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + + ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); public: - ARMConstantPoolValue(const Constant *cval, unsigned id, - ARMCP::ARMCPKind Kind = ARMCP::CPValue, - unsigned char PCAdj = 0, - ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, - bool AddCurrentAddress = false); - ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, - unsigned char PCAdj = 0, - ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, - bool AddCurrentAddress = false); - ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier); - ARMConstantPoolValue(); - ~ARMConstantPoolValue(); + virtual ~ARMConstantPoolValue(); - const GlobalValue *getGV() const; - const char *getSymbol() const { return S; } - const BlockAddress *getBlockAddress() const; ARMCP::ARMCPModifier getModifier() const { return Modifier; } - const char *getModifierText() const { - switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); - // FIXME: Are these case sensitive? It'd be nice to lower-case all the - // strings if that's legal. - case ARMCP::no_modifier: return "none"; - case ARMCP::TLSGD: return "tlsgd"; - case ARMCP::GOT: return "GOT"; - case ARMCP::GOTOFF: return "GOTOFF"; - case ARMCP::GOTTPOFF: return "gottpoff"; - case ARMCP::TPOFF: return "tpoff"; - } - } + const char *getModifierText() const; bool hasModifier() const { return Modifier != ARMCP::no_modifier; } + bool mustAddCurrentAddress() const { return AddCurrentAddress; } + unsigned getLabelId() const { return LabelId; } unsigned char getPCAdjustment() const { return PCAdjust; } + bool isGlobalValue() const { return Kind == ARMCP::CPValue; } bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; } - bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; } - bool isLSDA() { return Kind == ARMCP::CPLSDA; } + bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; } + bool isLSDA() const { return Kind == ARMCP::CPLSDA; } + bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } virtual unsigned getRelocationInfo() const { return 2; } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); - virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); - /// hasSameValue - Return true if this ARM constpool value - /// can share the same constantpool entry as another ARM constpool value. - bool hasSameValue(ARMConstantPoolValue *ACPV); + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + bool equals(const ARMConstantPoolValue *A) const { + return this->LabelId == A->LabelId && + this->PCAdjust == A->PCAdjust && + this->Modifier == A->Modifier; + } + virtual void print(raw_ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } - void print(raw_ostream &O) const; void dump() const; + + static bool classof(const ARMConstantPoolValue *) { return true; } }; inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { @@ -117,6 +111,123 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { return O; } +/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants, +/// Functions, and BlockAddresses. +class ARMConstantPoolConstant : public ARMConstantPoolValue { + const Constant *CVal; // Constant being loaded. + + ARMConstantPoolConstant(const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + ARMConstantPoolConstant(Type *Ty, const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID); + static ARMConstantPoolConstant *Create(const GlobalValue *GV, + ARMCP::ARMCPModifier Modifier); + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj); + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + + const GlobalValue *getGV() const; + const BlockAddress *getBlockAddress() const; + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + virtual void print(raw_ostream &O) const; + static bool classof(const ARMConstantPoolValue *APV) { + return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); + } + static bool classof(const ARMConstantPoolConstant *) { return true; } +}; + +/// ARMConstantPoolSymbol - ARM-specific constantpool values for external +/// symbols. +class ARMConstantPoolSymbol : public ARMConstantPoolValue { + const char *S; // ExtSymbol being loaded. + + ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + ~ARMConstantPoolSymbol(); + + static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, + unsigned ID, unsigned char PCAdj); + + const char *getSymbol() const { return S; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void print(raw_ostream &O) const; + + static bool classof(const ARMConstantPoolValue *ACPV) { + return ACPV->isExtSymbol(); + } + static bool classof(const ARMConstantPoolSymbol *) { return true; } +}; + +/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic +/// block. +class ARMConstantPoolMBB : public ARMConstantPoolValue { + const MachineBasicBlock *MBB; // Machine basic block. + + ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + static ARMConstantPoolMBB *Create(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned ID, unsigned char PCAdj); + + const MachineBasicBlock *getMBB() const { return MBB; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void print(raw_ostream &O) const; + + static bool classof(const ARMConstantPoolValue *ACPV) { + return ACPV->isMachineBasicBlock(); + } + static bool classof(const ARMConstantPoolMBB *) { return true; } +}; + } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 94b72fd..7872cb9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -16,19 +16,24 @@ #define DEBUG_TYPE "arm-pseudo" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! using namespace llvm; +static cl::opt<bool> +VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, + cl::desc("Verify machine code after expanding ARM pseudos")); + namespace { class ARMExpandPseudo : public MachineFunctionPass { public: @@ -741,8 +746,22 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::MOVCCs: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + case ARM::MOVCCsi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + (MI.getOperand(1).getReg())) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .addReg(MI.getOperand(5).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + + case ARM::MOVCCsr: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) .addReg(MI.getOperand(2).getReg(), getKillRegState(MI.getOperand(2).isKill())) @@ -837,10 +856,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { // These are just fancy MOVs insructions. - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) - .addReg(0) .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))) @@ -851,10 +869,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(1)) .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) .addReg(0); TransferImpOps(MI, MIB, MIB); @@ -953,34 +970,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandMOV32BitImm(MBB, MBBI); return true; - case ARM::VMOVQQ: { - unsigned DstReg = MI.getOperand(0).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); - unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); - unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); - unsigned SrcReg = MI.getOperand(1).getReg(); - bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); - unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); - MachineInstrBuilder Even = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VORRq)) - .addReg(EvenDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); - MachineInstrBuilder Odd = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VORRq)) - .addReg(OddDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); - TransferImpOps(MI, Even, Odd); - MI.eraseFromParent(); - return true; - } - case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = @@ -1316,6 +1305,8 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) Modified |= ExpandMBB(*MFI); + if (VerifyARMPseudo) + MF.verify(this, "After expanding ARM pseudo instructions."); return Modified; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index f469d7e..9bc7ef2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" #include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" @@ -171,8 +171,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: - bool isTypeLegal(const Type *Ty, MVT &VT); - bool isLoadTypeLegal(const Type *Ty, MVT &VT); + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -502,11 +502,19 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { // This checks to see if we can use VFP3 instructions to materialize // a constant, otherwise we have to go through the constant pool. if (TLI.isFPImmLegal(Val, VT)) { - unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; + int Imm; + unsigned Opc; + if (is64bit) { + Imm = ARM_AM::getFP64Imm(Val); + Opc = ARM::FCONSTD; + } else { + Imm = ARM_AM::getFP32Imm(Val); + Opc = ARM::FCONSTS; + } unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addFPImm(CFP)); + .addImm(Imm)); return DestReg; } @@ -590,8 +598,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, - ARMCP::CPValue, PCAdj); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); // Load value. @@ -615,8 +624,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb) - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12), - NewDestReg) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRi12), NewDestReg) .addReg(DestReg) .addImm(0); else @@ -673,7 +682,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { return 0; } -bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { +bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(Ty, true); // Only handle simple types. @@ -685,7 +694,7 @@ bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { return TLI.isTypeLegal(VT); } -bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { +bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; // If this is a type than can be sign or zero-extended to a basic operation @@ -714,7 +723,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { U = C; } - if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) + if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -749,7 +758,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); @@ -946,6 +955,10 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { } bool ARMFastISel::SelectLoad(const Instruction *I) { + // Atomic loads need special handling. + if (cast<LoadInst>(I)->isAtomic()) + return false; + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) @@ -1008,6 +1021,10 @@ bool ARMFastISel::SelectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; + // Atomic stores need special handling. + if (cast<StoreInst>(I)->isAtomic()) + return false; + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) @@ -1085,7 +1102,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // TODO: Factor this out. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { MVT SourceVT; - const Type *Ty = CI->getOperand(0)->getType(); + Type *Ty = CI->getOperand(0)->getType(); if (CI->hasOneUse() && (CI->getParent() == I->getParent()) && isTypeLegal(Ty, SourceVT)) { bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); @@ -1201,7 +1218,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); MVT VT; - const Type *Ty = CI->getOperand(0)->getType(); + Type *Ty = CI->getOperand(0)->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1309,7 +1326,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (!Subtarget->hasVFP2()) return false; MVT DstVT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, DstVT)) return false; @@ -1328,7 +1345,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { unsigned Opc; if (Ty->isFloatTy()) Opc = ARM::VSITOS; else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; - else return 0; + else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), @@ -1343,7 +1360,7 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { if (!Subtarget->hasVFP2()) return false; MVT DstVT; - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); if (!isTypeLegal(RetTy, DstVT)) return false; @@ -1351,10 +1368,10 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { if (Op == 0) return false; unsigned Opc; - const Type *OpTy = I->getOperand(0)->getType(); + Type *OpTy = I->getOperand(0)->getType(); if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; - else return 0; + else return false; // f64->s32 or f32->s32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); @@ -1401,7 +1418,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { bool ARMFastISel::SelectSDiv(const Instruction *I) { MVT VT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1429,7 +1446,7 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) { bool ARMFastISel::SelectSRem(const Instruction *I) { MVT VT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1456,7 +1473,7 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // operations, but can't figure out how to. Just use the vfp instructions // if we have them. // FIXME: It'd be nice to use NEON instructions. - const Type *Ty = I->getType(); + Type *Ty = I->getType(); bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); if (isFloat && !Subtarget->hasVFP2()) return false; @@ -1711,7 +1728,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext()); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); const Value *RV = Ret->getOperand(0); @@ -1778,7 +1795,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { CallingConv::ID CC = TLI.getLibcallCallingConv(Call); // Handle *simple* calls for now. - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1802,7 +1819,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned Arg = getRegForValue(Op); if (Arg == 0) return false; - const Type *ArgTy = Op->getType(); + Type *ArgTy = Op->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1870,13 +1887,13 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // TODO: Avoid some calling conventions? // Let SDISel handle vararg functions. - const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); if (FTy->isVarArg()) return false; // Handle *simple* calls for now. - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1915,7 +1932,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; - const Type *ArgTy = (*i)->getType(); + Type *ArgTy = (*i)->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1969,9 +1986,9 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. The high bits for a type smaller than // the register size are assumed to be undefined. - const Type *DestTy = I->getType(); + Type *DestTy = I->getType(); Value *Op = I->getOperand(0); - const Type *SrcTy = Op->getType(); + Type *SrcTy = Op->getType(); EVT SrcVT, DestVT; SrcVT = TLI.getValueType(SrcTy, true); @@ -2002,16 +2019,18 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i16: + if (!Subtarget->hasV6Ops()) return false; if (isZext) - Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr; + Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; else - Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr; + Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; break; case MVT::i8: + if (!Subtarget->hasV6Ops()) return false; if (isZext) - Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr; + Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; else - Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr; + Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; break; case MVT::i1: if (isZext) { @@ -2033,6 +2052,8 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { .addReg(SrcReg); if (isBoolZext) MIB.addImm(1); + else + MIB.addImm(0); AddOptionalDefs(MIB); UpdateValueMap(I, DestReg); return true; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 381b404..2d1de6f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMFrameLowering.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -93,7 +93,8 @@ static bool isCSRestore(MachineInstr *MI, return false; return true; } - if ((MI->getOpcode() == ARM::LDR_POST || + if ((MI->getOpcode() == ARM::LDR_POST_IMM || + MI->getOpcode() == ARM::LDR_POST_REG || MI->getOpcode() == ARM::t2LDR_POST) && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && MI->getOperand(1).getReg() == ARM::SP) @@ -413,6 +414,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } + + // Add the default predicate in Thumb mode. + if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). @@ -502,7 +506,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, } } } else if (AFI->isThumb2Function()) { - // Use add <rd>, sp, #<imm8> + // Use add <rd>, sp, #<imm8> // ldr <rd>, [sp, #<imm8>] // if at all possible to save space. if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) @@ -587,14 +591,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP).setMIFlags(MIFlags); - // ARM mode needs an extra reg0 here due to addrmode2. Will go away once - // that refactoring is complete (eventually). - if (StrOpc == ARM::STR_PRE) { - MIB.addReg(0); - MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift)); - } else - MIB.addImm(-4); + .addReg(ARM::SP).setMIFlags(MIFlags) + .addImm(-4); AddDefaultPred(MIB); } Regs.clear(); @@ -651,8 +649,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet) + if (DeleteRet) { + MIB->copyImplicitOps(&*MI); MI->eraseFromParent(); + } MI = MIB; } else if (Regs.size() == 1) { // If we adjusted the reg to PC from LR above, switch it back here. We @@ -665,7 +665,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). - if (LdrOpc == ARM::LDR_POST) { + if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else @@ -687,7 +687,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; - unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; + unsigned PushOneOpc = AFI->isThumbFunction() ? + ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, MachineInstr::FrameSetup); @@ -711,7 +712,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST; + unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp index 8d77b2d..5f863ea 100644 --- a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp @@ -100,8 +100,8 @@ namespace { GlobalCmp(const TargetData *td) : TD(td) { } bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { - const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); } @@ -123,7 +123,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); - const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; @@ -150,7 +150,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2); + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); } @@ -176,7 +176,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) { // Ignore fancy-aligned globals for now. unsigned Alignment = I->getAlignment(); - const Type *Ty = I->getType()->getElementType(); + Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2c9481b..5ee009c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMAddressingModes.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); +static cl::opt<bool> +DisableARMIntABS("disable-arm-int-abs", cl::Hidden, + cl::desc("Enable / disable ARM integer abs transform"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -90,13 +95,20 @@ public: bool hasNoVMLxHazardUse(SDNode *N) const; bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); - bool SelectShifterOperandReg(SDValue N, SDValue &A, + bool SelectRegShifterOperand(SDValue N, SDValue &A, SDValue &B, SDValue &C, bool CheckProfitability = true); - bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, + bool SelectImmShifterOperand(SDValue N, SDValue &A, + SDValue &B, bool CheckProfitability = true); + bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, SDValue &C) { // Don't apply the profitability check - return SelectShifterOperandReg(N, A, B, C, false); + return SelectRegShifterOperand(N, A, B, C, false); + } + bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, + SDValue &B) { + // Don't apply the profitability check + return SelectImmShifterOperand(N, A, B, false); } bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); @@ -122,8 +134,13 @@ public: return true; } - bool SelectAddrMode2Offset(SDNode *Op, SDValue N, + bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrOffsetNone(SDValue N, SDValue &Base); bool SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDNode *Op, SDValue N, @@ -240,8 +257,13 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + // Select special operations if node forms integer ABS pattern + SDNode *SelectABSOp(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); + SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -291,10 +313,10 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { /// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. -static bool isScaledConstantInRange(SDValue Node, unsigned Scale, +static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant) { - assert(Scale && "Invalid scale!"); + assert(Scale > 0 && "Invalid scale!"); // Check that this is a constant. const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); @@ -365,7 +387,30 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, return ShOpcVal == ARM_AM::lsl && ShAmt == 2; } -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, +bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, + SDValue &BaseReg, + SDValue &Opc, + bool CheckProfitability) { + if (DisableShifterOp) + return false; + + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!RHS) return false; + ShImmVal = RHS->getZExtValue() & 31; + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc, @@ -373,7 +418,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, if (DisableShifterOp) return false; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate // lower complexity pattern with explicit register operand. @@ -381,19 +426,18 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, BaseReg = N.getOperand(0); unsigned ShImmVal = 0; - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) - return false; - } + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (RHS) return false; + + ShReg = N.getOperand(1); + if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + return false; Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), MVT::i32); return true; } + bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { @@ -483,13 +527,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) - // Compute R +/- (R << N) and reuse it. - return false; - // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + ARM_AM::ShiftOpc ShOpcVal = + ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); unsigned ShAmt = 0; Base = N.getOperand(0); @@ -515,16 +556,14 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { - ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't // fold it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -630,7 +669,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + ARM_AM::ShiftOpc ShOpcVal = + ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); unsigned ShAmt = 0; Base = N.getOperand(0); @@ -656,16 +696,14 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { - ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't // fold it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -683,7 +721,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, return AM2_SHOP; } -bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) @@ -692,16 +730,11 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) ? ARM_AM::add : ARM_AM::sub; int Val; - if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, - ARM_AM::no_shift), - MVT::i32); - return true; - } + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) + return false; Offset = N; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); unsigned ShAmt = 0; if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't fold @@ -724,6 +757,50 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, return true; } +bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. + if (AddSub == ARM_AM::sub) Val *= -1; + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(Val, MVT::i32); + return true; + } + + return false; +} + + +bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; + } + + return false; +} + +bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { + Base = N; + return true; +} bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, @@ -1079,7 +1156,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, if (DisableShifterOp) return false; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate // lower complexity pattern with explicit register operand. @@ -1208,21 +1285,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { - // Compute R + (R << [1,2,3]) and reuse it. - Base = N; - return false; - } - // Look for (R + R) or (R + (R << [1,2,3])). unsigned ShAmt = 0; Base = N.getOperand(0); OffReg = N.getOperand(1); // Swap if it is ((R << c) + R). - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); if (ShOpcVal != ARM_AM::lsl) { - ShOpcVal = ARM_AM::getShiftOpcForNode(Base); + ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); if (ShOpcVal == ARM_AM::lsl) std::swap(Base, OffReg); } @@ -1266,10 +1337,19 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; bool Match = false; - if (LoadedVT == MVT::i32 && - SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { - Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + if (LoadedVT == MVT::i32 && isPre && + SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = ARM::LDR_PRE_IMM; + Match = true; + } else if (LoadedVT == MVT::i32 && !isPre && + SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = ARM::LDR_POST_IMM; Match = true; + } else if (LoadedVT == MVT::i32 && + SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; + Match = true; + } else if (LoadedVT == MVT::i16 && SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; @@ -1283,20 +1363,37 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; } } else { - if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { + if (isPre && + SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = ARM::LDRB_PRE_IMM; + } else if (!isPre && + SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = ARM::LDRB_POST_IMM; + } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { Match = true; - Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; } } } if (Match) { - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), - CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 6); + if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + MVT::i32, MVT::Other, Ops, 5); + } else { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + MVT::i32, MVT::Other, Ops, 6); + } } return NULL; @@ -1966,7 +2063,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); - unsigned Width = CountTrailingOnes_32(And_imm); + // Note: The width operand is encoded as width-1. + unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), @@ -1986,7 +2084,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Srl_imm = 0; if (isInt32Immediate(N->getOperand(1), Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); - unsigned Width = 32 - Srl_imm; + // Note: The width operand is encoded as width-1. + unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) return NULL; @@ -2034,10 +2133,16 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, SDValue CPTmp0; SDValue CPTmp1; SDValue CPTmp2; - if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); } return 0; } @@ -2198,6 +2303,56 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +/// Target-specific DAG combining for ISD::XOR. +/// Target-independent combining lowers SELECT_CC nodes of the form +/// select_cc setg[ge] X, 0, X, -X +/// select_cc setgt X, -1, X, -X +/// select_cc setl[te] X, 0, -X, X +/// select_cc setlt X, 1, -X, X +/// which represent Integer ABS into: +/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// ARM instruction selection detects the latter and matches it to +/// ARM::ABS or ARM::t2ABS machine node. +SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ + SDValue XORSrc0 = N->getOperand(0); + SDValue XORSrc1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + if (DisableARMIntABS) + return NULL; + + if (Subtarget->isThumb1Only()) + return NULL; + + if (XORSrc0.getOpcode() != ISD::ADD || + XORSrc1.getOpcode() != ISD::SRA) + return NULL; + + SDValue ADDSrc0 = XORSrc0.getOperand(0); + SDValue ADDSrc1 = XORSrc0.getOperand(1); + SDValue SRASrc0 = XORSrc1.getOperand(0); + SDValue SRASrc1 = XORSrc1.getOperand(1); + ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); + EVT XType = SRASrc0.getValueType(); + unsigned Size = XType.getSizeInBits() - 1; + + if (ADDSrc1 == XORSrc1 && + ADDSrc0 == SRASrc0 && + XType.isInteger() && + SRAConstant != NULL && + Size == SRAConstant->getZExtValue()) { + + unsigned Opcode = ARM::ABS; + if (Subtarget->isThumb2()) + Opcode = ARM::t2ABS; + + return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + } + + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. @@ -2207,6 +2362,25 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); } +SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { + SmallVector<SDValue, 6> Ops; + Ops.push_back(Node->getOperand(1)); // Ptr + Ops.push_back(Node->getOperand(2)); // Low part of Val1 + Ops.push_back(Node->getOperand(3)); // High part of Val1 + if (Opc == ARM::ATOMCMPXCHG6432) { + Ops.push_back(Node->getOperand(4)); // Low part of Val2 + Ops.push_back(Node->getOperand(5)); // High part of Val2 + } + Ops.push_back(Node->getOperand(0)); // Chain + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + MVT::i32, MVT::i32, MVT::Other, + Ops.data() ,Ops.size()); + cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); + return ResNode; +} + SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); @@ -2215,6 +2389,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::XOR: { + // Select special operations if XOR node forms integer ABS pattern + SDNode *ResNode = SelectABSOp(N); + if (ResNode) + return ResNode; + // Other cases are autogenerated. + break; + } case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; @@ -2269,8 +2451,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); if (Subtarget->isThumb1Only()) { - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI, - CurDAG->getTargetConstant(0, MVT::i32)); + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); @@ -2307,7 +2490,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2323,7 +2506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); } } } @@ -2986,6 +3169,23 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); + + case ARMISD::ATOMOR64_DAG: + return SelectAtomic64(N, ARM::ATOMOR6432); + case ARMISD::ATOMXOR64_DAG: + return SelectAtomic64(N, ARM::ATOMXOR6432); + case ARMISD::ATOMADD64_DAG: + return SelectAtomic64(N, ARM::ATOMADD6432); + case ARMISD::ATOMSUB64_DAG: + return SelectAtomic64(N, ARM::ATOMSUB6432); + case ARMISD::ATOMNAND64_DAG: + return SelectAtomic64(N, ARM::ATOMNAND6432); + case ARMISD::ATOMAND64_DAG: + return SelectAtomic64(N, ARM::ATOMAND6432); + case ARMISD::ATOMSWAP64_DAG: + return SelectAtomic64(N, ARM::ATOMSWAP6432); + case ARMISD::ATOMCMPXCHG64_DAG: + return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); } return SelectCode(N); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index cf8c5ba..e44e356 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" @@ -24,6 +23,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -106,7 +107,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); @@ -178,6 +179,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2()) { @@ -419,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); } + // Use divmod compiler-rt calls for iOS 5.0 and later. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -453,7 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::SETCC, MVT::v2f64, Expand); setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -485,8 +495,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v1i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); @@ -551,6 +561,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + if (!Subtarget->isThumb1Only()) { + // FIXME: We should do this for Thumb1 as well. + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); + } + // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Custom); @@ -596,62 +614,46 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. + // FIXME: This should be checking for v6k, not just v6. if (Subtarget->hasDataBarrier() || (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // Custom lowering for 64-bit ops + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the + // Unordered/Monotonic case. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } - // 64-bit versions are always libcalls (for now) - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -839,6 +841,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; + case ARMISD::ADDC: return "ARMISD::ADDC"; + case ARMISD::ADDE: return "ARMISD::ADDE"; + case ARMISD::SUBC: return "ARMISD::SUBC"; + case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -935,6 +942,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return getPointerTy(); + return VT.changeVectorElementTypeToInteger(); +} + /// getRegClassFor - Return the register class that should be used for the /// specified value type. TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { @@ -1210,8 +1222,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool IsSibCall = false; - // Temporarily disable tail calls so things don't break. - if (!EnableARMTailCalls) + // Disable tail calls if they're not supported. + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. @@ -1336,10 +1348,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + // TODO: Disable AlwaysInline when it becomes possible + // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/false, + /*AlwaysInline=*/true, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -1404,9 +1418,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); + // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1419,8 +1433,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1441,9 +1456,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1470,8 +1484,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1940,9 +1955,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, - ARMCP::CPBlockAddress, - PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, + ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); @@ -1966,8 +1981,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, @@ -1982,11 +1997,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), + LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -2013,8 +2028,9 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, + true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2030,7 +2046,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, false, false, 0); } else { // local exec model - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2066,7 +2083,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + ARMConstantPoolConstant::Create(GV, + UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), @@ -2135,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, + PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -2167,9 +2186,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, @@ -2191,7 +2210,8 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } @@ -2224,8 +2244,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, - ARMCP::CPLSDA, PCAdj); + ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = @@ -2277,6 +2297,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } + +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // FIXME: handle "fence singlethread" more efficiently. + DebugLoc dl = Op.getDebugLoc(); + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } + + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(ARM_MB::ISH, MVT::i32)); +} + static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. @@ -2754,7 +2793,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; @@ -2993,8 +3032,8 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - EVT OperandVT = Op.getOperand(0).getValueType(); - assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && + "Invalid type for custom lowering!"); if (VT != MVT::v4f32) return DAG.UnrollVectorOp(Op.getNode()); @@ -3905,8 +3944,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Try an immediate VMVN. - uint64_t NegatedImm = (SplatBits.getZExtValue() ^ - ((1LL << SplatBitSize) - 1)); + uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, VmovVT, VT.is128BitVector(), @@ -4019,6 +4057,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (V.getOperand(0).getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // This code doesn't know how to handle shuffles where the vector + // element types do not match (this happens because type legalization + // promotes the return type of EXTRACT_VECTOR_ELT). + // FIXME: It might be appropriate to extend this code to handle + // mismatched types. + return SDValue(); } // Record this extraction against the appropriate vector if possible... @@ -4819,6 +4865,71 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { return N0; } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getNode()->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: assert(0 && "Invalid code"); + case ISD::ADDC: Opc = ARMISD::ADDC; break; + case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; + case ISD::SUBC: Opc = ARMISD::SUBC; break; + case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; + } + + if (!ExtraOp) + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2)); +} + +static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { + // Monotonic load/store is legal for all targets + if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) + return Op; + + // Aquire/Release load/store is not legal for targets without a + // dmb or equivalent available. + return SDValue(); +} + + +static void +ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, + SelectionDAG &DAG, unsigned NewOp) { + DebugLoc dl = Node->getDebugLoc(); + assert (Node->getValueType(0) == MVT::i64 && + "Only know how to expand i64 atomics"); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(Node->getOperand(0)); // Chain + Ops.push_back(Node->getOperand(1)); // Ptr + // Low part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(0))); + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(1))); + if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(0))); + // High part of Val2 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(1))); + } + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, + cast<MemSDNode>(Node)->getMemOperand()); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(Result.getValue(2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -4834,6 +4945,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -4856,7 +4968,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); - case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -4865,6 +4977,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::UDIV: return LowerUDIV(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } return SDValue(); } @@ -4886,6 +5004,30 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::ATOMIC_LOAD_ADD: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + return; + case ISD::ATOMIC_LOAD_AND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); + return; + case ISD::ATOMIC_LOAD_NAND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); + return; + case ISD::ATOMIC_LOAD_OR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); + return; + case ISD::ATOMIC_LOAD_SUB: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); + return; + case ISD::ATOMIC_LOAD_XOR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); + return; + case ISD::ATOMIC_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); + return; + case ISD::ATOMIC_CMP_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -4963,7 +5105,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp dest, oldval // bne exitMBB BB = loop1MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(dest).addReg(oldval)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -4976,8 +5121,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp scratch, #0 // bne loop1MBB BB = loop2MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5063,7 +5210,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); if (BinOpcode) { // operand order needs to go the other way for NAND if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) @@ -5074,8 +5224,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, addReg(dest).addReg(incr)).addReg(0); } - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5125,12 +5277,12 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; @@ -5170,12 +5322,17 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); // Sign extend the value, if necessary. if (signExtend && extendOpc) { oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) + .addReg(dest) + .addImm(0)); } // Build compare and cmov instructions. @@ -5184,8 +5341,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5203,79 +5362,596 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, return BB; } -static -MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I != Succ) - return *I; - llvm_unreachable("Expecting a BB with two successors!"); -} +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Op1, unsigned Op2, + bool NeedsCarry, bool IsCmpxchg) const { + // This also handles ATOMIC_SWAP, indicated by Op1==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); -// FIXME: This opcode table should obviously be expressed in the target -// description. We probably just need a "machine opcode" value in the pseudo -// instruction. But the ideal solution maybe to simply remove the "S" version -// of the opcode altogether. -struct AddSubFlagsOpcodePair { - unsigned PseudoOpc; - unsigned MachineOpc; -}; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; -static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { - {ARM::ADCSri, ARM::ADCri}, - {ARM::ADCSrr, ARM::ADCrr}, - {ARM::ADCSrs, ARM::ADCrs}, - {ARM::SBCSri, ARM::SBCri}, - {ARM::SBCSrr, ARM::SBCrr}, - {ARM::SBCSrs, ARM::SBCrs}, - {ARM::RSBSri, ARM::RSBri}, - {ARM::RSBSrr, ARM::RSBrr}, - {ARM::RSBSrs, ARM::RSBrs}, - {ARM::RSCSri, ARM::RSCri}, - {ARM::RSCSrs, ARM::RSCrs}, - {ARM::t2ADCSri, ARM::t2ADCri}, - {ARM::t2ADCSrr, ARM::t2ADCrr}, - {ARM::t2ADCSrs, ARM::t2ADCrs}, - {ARM::t2SBCSri, ARM::t2SBCri}, - {ARM::t2SBCSrr, ARM::t2SBCrr}, - {ARM::t2SBCSrs, ARM::t2SBCrs}, - {ARM::t2RSBSri, ARM::t2RSBri}, - {ARM::t2RSBSrs, ARM::t2RSBrs}, -}; + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + unsigned vallo = MI->getOperand(3).getReg(); + unsigned valhi = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); -// Convert and Add or Subtract with Carry and Flags to a generic opcode with -// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). -// -// FIXME: Somewhere we should assert that CPSR<def> is in the correct -// position to be recognized by the target descrition as the 'S' bit. -bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned OldOpc = MI->getOpcode(); - unsigned NewOpc = 0; + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); + MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); + MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + } - // This is only called for instructions that need remapping, so iterating over - // the tiny opcode table is not costly. - static const int NPairs = - sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); - for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], - *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { - if (OldOpc == Pair->PseudoOpc) { - NewOpc = Pair->MachineOpc; - break; + unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; + unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *contBB = 0, *cont2BB = 0; + if (IsCmpxchg) { + contBB = MF->CreateMachineBasicBlock(LLVM_BB); + cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); + } + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + if (IsCmpxchg) { + MF->insert(It, contBB); + MF->insert(It, cont2BB); + } + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + TargetRegisterClass *TRC = + isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + unsigned storesuccess = MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrexd r2, r3, ptr + // <binopa> r0, r2, incr + // <binopb> r1, r3, incr + // strexd storesuccess, r0, r1, ptr + // cmp storesuccess, #0 + // bne- loopMBB + // fallthrough --> exitMBB + // + // Note that the registers are explicitly specified because there is not any + // way to force the register allocator to allocate a register pair. + // + // FIXME: The hardcoded registers are not necessary for Thumb2, but we + // need to properly enforce the restriction that the two output registers + // for ldrexd must be different. + BB = loopMBB; + // Load + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(ARM::R2, RegState::Define) + .addReg(ARM::R3, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); + + if (IsCmpxchg) { + // Add early exit + for (unsigned i = 0; i < 2; i++) { + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : + ARM::CMPrr)) + .addReg(i == 0 ? destlo : desthi) + .addReg(i == 0 ? vallo : valhi)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(i == 0 ? contBB : cont2BB); + BB = (i == 0 ? contBB : cont2BB); } + + // Copy to physregs for strexd + unsigned setlo = MI->getOperand(5).getReg(); + unsigned sethi = MI->getOperand(6).getReg(); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + } else if (Op1) { + // Perform binary operation + AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) + .addReg(destlo).addReg(vallo)) + .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) + .addReg(desthi).addReg(valhi)).addReg(0); + } else { + // Copy to physregs for strexd + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); } - if (!NewOpc) - return false; + // Store + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) + .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); + // Cmp+jump + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(storesuccess).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +/// EmitBasePointerRecalculation - For functions using a base pointer, we +/// rematerialize it (via the frame pointer). +void ARMTargetLowering:: +EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + MachineFunction &MF = *MI->getParent()->getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + + if (!RI.hasBasePointer(MF)) return; + + MachineBasicBlock::iterator MBBI = MI; + + int32_t NumBytes = AFI->getFramePtrSpillOffset(); + unsigned FramePtr = RI.getFrameRegister(MF); + assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + "Base pointer without frame pointer?"); + + if (AFI->isThumb2Function()) + llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + else if (AFI->isThumbFunction()) + llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *AII, RI); + else + llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + + if (!RI.needsStackRealignment(MF)) return; + + // If there's dynamic realignment, adjust for it. + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + assert(!AFI->isThumb1OnlyFunction()); + + // Emit bic r6, r6, MaxAlign + unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; + AddDefaultCC( + AddDefaultPred( + BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign - 1))); +} + +/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and +/// registers the function context. +void ARMTargetLowering:: +SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - AddDefaultPred(MIB); - MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + const Function *F = MF->getFunction(); + + bool isThumb = Subtarget->isThumb(); + bool isThumb2 = Subtarget->isThumb2(); + + unsigned PCLabelId = AFI->createPICLabelUId(); + unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; + ARMConstantPoolValue *CPV = + ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); + + const TargetRegisterClass *TRC = + isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Grab constant pool and fixed stack memory operands. + MachineMemOperand *CPMMO = + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), + MachineMemOperand::MOLoad, 4, 4); + + MachineMemOperand *FIMMOSt = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, 4, 4); + + EmitBasePointerRecalculation(MI, MBB, DispatchBB); + + // Load the address of the dispatch MBB into the jump buffer. + if (isThumb2) { + // Incoming value: jbuf + // ldr.n r5, LCPI1_1 + // orr r5, r5, #1 + // add r5, pc + // str r5, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + // Set the low bit because of thumb mode. + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(0x01))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addImm(PCLabelId); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) + .addReg(NewVReg3, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } else if (isThumb) { + // Incoming value: jbuf + // ldr.n r1, LCPI1_4 + // add r1, pc + // mov r2, #1 + // orrs r1, r2 + // add r2, $jbuf, #+4 ; &jbuf[1] + // str r1, [r2] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId); + // Set the low bit because of thumb mode. + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) + .addReg(ARM::CPSR, RegState::Define) + .addImm(1)); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3, RegState::Kill)); + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) + .addFrameIndex(FI) + .addImm(36)); // &jbuf[1] :: pc + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg5, RegState::Kill) + .addImm(0) + .addMemOperand(FIMMOSt)); + } else { + // Incoming value: jbuf + // ldr r1, LCPI1_1 + // add r1, pc, r1 + // str r1, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) + .addConstantPoolIndex(CPI) + .addImm(0) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId)); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) + .addReg(NewVReg2, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } +} + +MachineBasicBlock *ARMTargetLowering:: +EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + int FI = MFI->getFunctionContextIndex(); + + const TargetRegisterClass *TRC = + Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; + unsigned MaxCSNum = 0; + MachineModuleInfo &MMI = MF->getMMI(); + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + if (!BB->isLandingPad()) continue; + + // FIXME: We should assert that the EH_LABEL is the first MI in the landing + // pad. + for (MachineBasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) { + if (!II->isEHLabel()) continue; + + MCSymbol *Sym = II->getOperand(0).getMCSymbol(); + if (!MMI.hasCallSiteLandingPad(Sym)) continue; + + SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); + for (SmallVectorImpl<unsigned>::iterator + CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); + CSI != CSE; ++CSI) { + CallSiteNumToLPad[*CSI].push_back(BB); + MaxCSNum = std::max(MaxCSNum, *CSI); + } + break; + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector<MachineBasicBlock*> LPadList; + SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + for (unsigned I = 1; I <= MaxCSNum; ++I) { + SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; + for (SmallVectorImpl<MachineBasicBlock*>::iterator + II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { + LPadList.push_back(*II); + InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // Create the jump table and associated information. + MachineJumpTableInfo *JTI = + MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + unsigned UId = AFI->createJumpTableUId(); + + // Create the MBBs for the dispatch code. + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsLandingPad(); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert and renumber MBBs. + MachineBasicBlock *Last = &MF->back(); + MF->insert(MF->end(), DispatchBB); + MF->insert(MF->end(), DispContBB); + MF->insert(MF->end(), TrapBB); + MF->RenumberBlocks(Last); + + // Insert code into the entry block that creates and registers the function + // context. + SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); + + MachineMemOperand *FIMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad | + MachineMemOperand::MOVolatile, 4, 4); + + if (Subtarget->isThumb2()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + + BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg1) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else if (Subtarget->isThumb()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) + .addFrameIndex(FI) + .addImm(1) + .addMemOperand(FIMMOLd)); + + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg1) + .addImm(2)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) + .addReg(NewVReg4, RegState::Kill) + .addImm(0) + .addMemOperand(JTMMOLd)); + + unsigned NewVReg6 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3)); + + BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) + .addReg(NewVReg6, RegState::Kill) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3) + .addImm(0) + .addMemOperand(JTMMOLd)); + + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId); + } + + // Add the jump table entries as successors to the MBB. + MachineBasicBlock *PrevMBB = 0; + for (std::vector<MachineBasicBlock*>::iterator + I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { + MachineBasicBlock *CurMBB = *I; + if (PrevMBB != CurMBB) + DispContBB->addSuccessor(CurMBB); + PrevMBB = CurMBB; + } + + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + for (SmallPtrSet<MachineBasicBlock*, 64>::iterator + I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { + MachineBasicBlock *BB = *I; + + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + for (MachineBasicBlock::succ_iterator + SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SMBB = *SI; + if (SMBB->isLandingPad()) { + BB->removeSuccessor(SMBB); + SMBB->setIsLandingPad(false); + } + } + + BB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (MachineBasicBlock::reverse_iterator + II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { + if (!II->getDesc().isCall()) continue; + + DenseMap<unsigned, bool> DefRegs; + for (MachineInstr::mop_iterator + OI = II->operands_begin(), OE = II->operands_end(); + OI != OE; ++OI) { + if (!OI->isReg()) continue; + DefRegs[OI->getReg()] = true; + } + + MachineInstrBuilder MIB(&*II); + + for (unsigned i = 0; SavedRegs[i] != 0; ++i) { + if (!TRC->contains(SavedRegs[i])) continue; + if (!DefRegs[SavedRegs[i]]) + MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // The instruction is gone now. MI->eraseFromParent(); - return true; + + return MBB; +} + +static +MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I != Succ) + return *I; + llvm_unreachable("Expecting a BB with two successors!"); } MachineBasicBlock * @@ -5286,12 +5962,61 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: { - if (RemapAddSubWithFlags(MI, BB)) - return BB; - MI->dump(); llvm_unreachable("Unexpected instr type to insert"); } + // The Thumb2 pre-indexed stores have the same MI operands, they just + // define them differently in the .td files from the isel patterns, so + // they need pseudos. + case ARM::t2STR_preidx: + MI->setDesc(TII->get(ARM::t2STR_PRE)); + return BB; + case ARM::t2STRB_preidx: + MI->setDesc(TII->get(ARM::t2STRB_PRE)); + return BB; + case ARM::t2STRH_preidx: + MI->setDesc(TII->get(ARM::t2STRH_PRE)); + return BB; + + case ARM::STRi_preidx: + case ARM::STRBi_preidx: { + unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? + ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; + // Decode the offset. + unsigned Offset = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; + Offset = ARM_AM::getAM2Offset(Offset); + if (isSub) + Offset = -Offset; + + MachineMemOperand *MMO = *MI->memoperands_begin(); + BuildMI(*BB, MI, dl, TII->get(NewOpc)) + .addOperand(MI->getOperand(0)) // Rn_wb + .addOperand(MI->getOperand(1)) // Rt + .addOperand(MI->getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .addOperand(MI->getOperand(5)) // pred + .addOperand(MI->getOperand(6)) + .addMemOperand(MMO); + MI->eraseFromParent(); + return BB; + } + case ARM::STRr_preidx: + case ARM::STRBr_preidx: + case ARM::STRH_preidx: { + unsigned NewOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; + case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; + case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; + } + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + MI->eraseFromParent(); + return BB; + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -5370,6 +6095,31 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + + case ARM::ATOMADD6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, + isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, + /*NeedsCarry*/ true); + case ARM::ATOMSUB6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true); + case ARM::ATOMOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, + isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMXOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, + isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMAND6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, + isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMSWAP6432: + return EmitAtomicBinary64(MI, BB, 0, 0, false); + case ARM::ATOMCMPXCHG6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -5461,13 +6211,159 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) - .addMBB(exitMBB); + if (isThumb2) + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); + else + BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } + + case ARM::ABS: + case ARM::t2ABS: { + // To insert an ABS instruction, we have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // source vreg to test against 0, the destination vreg to set, + // the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + // It transforms + // V1 = ABS V0 + // into + // V2 = MOVS V0 + // BCC (branch to SinkBB if V0 >= 0) + // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) + // SinkBB: V1 = PHI(V2, V3) + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator BBI = BB; + ++BBI; + MachineFunction *Fn = BB->getParent(); + MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); + Fn->insert(BBI, RSBBB); + Fn->insert(BBI, SinkBB); + + unsigned int ABSSrcReg = MI->getOperand(1).getReg(); + unsigned int ABSDstReg = MI->getOperand(0).getReg(); + bool isThumb2 = Subtarget->isThumb2(); + MachineRegisterInfo &MRI = Fn->getRegInfo(); + // In Thumb mode S must not be specified if source register is the SP or + // PC and if destination register is the SP, so restrict register class + unsigned NewMovDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + unsigned NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + SinkBB->splice(SinkBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(RSBBB); + BB->addSuccessor(SinkBB); + + // fall through to SinkMBB + RSBBB->addSuccessor(SinkBB); + + // insert a movs at the end of BB + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), + NewMovDstReg) + .addReg(ABSSrcReg, RegState::Kill) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::CPSR, RegState::Define); + + // insert a bcc with opposite CC to ARMCC::MI at the end of BB + BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) + .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); + + // insert rsbri in RSBBB + // Note: BCC and rsbri will be converted into predicated rsbmi + // by if-conversion pass + BuildMI(*RSBBB, RSBBB->begin(), dl, + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(NewMovDstReg, RegState::Kill) + .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + + // insert PHI in SinkBB, + // reuse ABSDstReg to not change uses of ABS instruction + BuildMI(*SinkBB, SinkBB->begin(), dl, + TII->get(ARM::PHI), ABSDstReg) + .addReg(NewRsbDstReg).addMBB(RSBBB) + .addReg(NewMovDstReg).addMBB(BB); + + // remove ABS instruction + MI->eraseFromParent(); + + // return last added BB + return SinkBB; + } + } +} + +void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.hasPostISelHook()) { + assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && + "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); + return; + } + + // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, + // RSC. Coming out of isel, they have an implicit CPSR def, but the optional + // operand is still set to noreg. If needed, set the optional operand's + // register to CPSR, and remove the redundant implicit def. + // + // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>). + + // Rename pseudo opcodes. + unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); + if (NewOpc) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); + MI->setDesc(TII->get(NewOpc)); + } + unsigned ccOutIdx = MCID.getNumOperands() - 1; + + // Any ARM instruction that sets the 's' bit should specify an optional + // "cc_out" operand in the last operand position. + if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it + // since we already have an optional CPSR def. + bool definesCPSR = false; + bool deadCPSR = false; + for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { + definesCPSR = true; + if (MO.isDead()) + deadCPSR = true; + MI->RemoveOperand(i); + break; + } + } + if (!definesCPSR) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); + if (deadCPSR) { + assert(!MI->getOperand(ccOutIdx).getReg() && + "expect uninitialized optional cc_out operand"); + return; } + + // If this instruction was defined with an optional CPSR def and its dag node + // had a live implicit CPSR def, then activate the optional CPSR def. + MachineOperand &MO = MI->getOperand(ccOutIdx); + MO.setReg(ARM::CPSR); + MO.setIsDef(true); } //===----------------------------------------------------------------------===// @@ -6975,7 +7871,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); // Simplify // mov r1, r0 @@ -6995,7 +7892,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; - if (CC == ARMCC::NE && FalseVal == RHS) { + if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { @@ -7235,7 +8132,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -7351,7 +8248,8 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, if (Ptr->getOpcode() == ISD::ADD) { isInc = true; - ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + ARM_AM::ShiftOpc ShOpcVal= + ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); @@ -7536,7 +8434,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } @@ -7559,6 +8457,9 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. + case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { @@ -7582,7 +8483,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: @@ -7618,7 +8519,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, ARM::GPRRegisterClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) - return RCPair(0U, ARM::hGPRRegisterClass); + return RCPair(0U, ARM::hGPRRegisterClass); break; case 'r': return RCPair(0U, ARM::GPRRegisterClass); @@ -7632,15 +8533,15 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, break; case 'x': if (VT == MVT::f32) - return RCPair(0U, ARM::SPR_8RegisterClass); + return RCPair(0U, ARM::SPR_8RegisterClass); if (VT.getSizeInBits() == 64) - return RCPair(0U, ARM::DPR_8RegisterClass); + return RCPair(0U, ARM::DPR_8RegisterClass); if (VT.getSizeInBits() == 128) - return RCPair(0U, ARM::QPR_8RegisterClass); + return RCPair(0U, ARM::QPR_8RegisterClass); break; case 't': if (VT == MVT::f32) - return RCPair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7680,12 +8581,12 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (ConstraintLetter) { case 'j': - // Constant suitable for movw, must be between 0 and - // 65535. - if (Subtarget->hasV6T2Ops()) - if (CVal >= 0 && CVal <= 65535) - break; - return; + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD @@ -7823,50 +8724,6 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } -int ARM::getVFPf32Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; - int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 - int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0x7ffff) - return -1; - Mantissa >>= 19; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -int ARM::getVFPf64Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; - int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 - uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0xffffffffffffLL) - return -1; - Mantissa >>= 48; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return 0; @@ -7889,9 +8746,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f32) - return ARM::getVFPf32Imm(Imm) != -1; + return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64) - return ARM::getVFPf64Imm(Imm) != -1; + return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -7933,7 +8790,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - const Type *ArgTy = I.getArgOperand(ArgI)->getType(); + Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 980fb40..5da9b27 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -71,6 +71,11 @@ namespace llvm { SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + ADDC, // Add with carry + ADDE, // Add using carry + SUBC, // Sub with carry + SUBE, // Sub using carry + VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. @@ -206,18 +211,22 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD + VST4LN_UPD, + + // 64-bit atomic ops (value split into two registers) + ATOMADD64_DAG, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + ATOMSWAP64_DAG, + ATOMCMPXCHG64_DAG }; } /// Define some predicates that are used for node matching. namespace ARM { - /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be - /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) - /// instruction, returns its 8-bit integer representation. Otherwise, - /// returns -1. - int getVFPf32Imm(const APFloat &FPImm); - int getVFPf64Imm(const APFloat &FPImm); bool isBitFieldInvertedMask(unsigned v); } @@ -240,10 +249,16 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + /// getSetCCResultType - Return the value type to use for ISD::SETCC. + virtual EVT getSetCCResultType(EVT VT) const; + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual void + AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -256,7 +271,7 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -485,12 +500,28 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Op1, + unsigned Op2, + bool NeedsCarry = false, + bool IsCmpxchg = false) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB) const; + + void SetupEntryBlockForSjLj(MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const; + + MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI, + MachineBasicBlock *MBB) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 3ccf22f..7cbc911 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -25,7 +25,7 @@ def BrFrm : Format<2>; def BrMiscFrm : Format<3>; def DPFrm : Format<4>; -def DPSoRegFrm : Format<5>; +def DPSoRegRegFrm : Format<5>; def LdFrm : Format<6>; def StFrm : Format<7>; @@ -68,6 +68,7 @@ def N3RegVShFrm : Format<38>; def NVExtFrm : Format<39>; def NVMulSLFrm : Format<40>; def NVTBLFrm : Format<41>; +def DPSoRegImmFrm : Format<42>; // Misc flags. @@ -130,39 +131,15 @@ def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 // ARM special operands. // -def CondCodeOperand : AsmOperandClass { - let Name = "CondCode"; - let SuperClasses = []; -} - -def CCOutOperand : AsmOperandClass { - let Name = "CCOut"; - let SuperClasses = []; -} - -def MemBarrierOptOperand : AsmOperandClass { - let Name = "MemBarrierOpt"; - let SuperClasses = []; - let ParserMethod = "tryParseMemBarrierOptOperand"; -} - -def ProcIFlagsOperand : AsmOperandClass { - let Name = "ProcIFlags"; - let SuperClasses = []; - let ParserMethod = "tryParseProcIFlagsOperand"; -} - -def MSRMaskOperand : AsmOperandClass { - let Name = "MSRMask"; - let SuperClasses = []; - let ParserMethod = "tryParseMSRMaskOperand"; -} - // ARM imod and iflag operands, used only by the CPS instruction. def imod_op : Operand<i32> { let PrintMethod = "printCPSIMod"; } +def ProcIFlagsOperand : AsmOperandClass { + let Name = "ProcIFlags"; + let ParserMethod = "parseProcIFlagsOperand"; +} def iflags_op : Operand<i32> { let PrintMethod = "printCPSIFlag"; let ParserMatchClass = ProcIFlagsOperand; @@ -170,17 +147,21 @@ def iflags_op : Operand<i32> { // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). -def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), +def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; } +def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm), (ops (i32 14), (i32 zero_reg))> { let PrintMethod = "printPredicateOperand"; let ParserMatchClass = CondCodeOperand; + let DecoderMethod = "DecodePredicateOperand"; } // Conditional code result for instructions whose 's' bit is set, e.g. subs. +def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; } // Same as cc_out except it defaults to setting CPSR. @@ -188,16 +169,27 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; } // ARM special operands for disassembly only. // +def SetEndAsmOperand : AsmOperandClass { + let Name = "SetEndImm"; + let ParserMethod = "parseSetEndImm"; +} def setend_op : Operand<i32> { let PrintMethod = "printSetendOperand"; + let ParserMatchClass = SetEndAsmOperand; } +def MSRMaskOperand : AsmOperandClass { + let Name = "MSRMask"; + let ParserMethod = "parseMSRMaskOperand"; +} def msr_mask : Operand<i32> { let PrintMethod = "printMSRMaskOperand"; + let DecoderMethod = "DecodeMSRMask"; let ParserMatchClass = MSRMaskOperand; } @@ -211,21 +203,40 @@ def msr_mask : Operand<i32> { // 64 64 - <imm> is encoded in imm6<5:0> def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; + let DecoderMethod = "DecodeShiftRight8Imm"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; + let DecoderMethod = "DecodeShiftRight16Imm"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; + let DecoderMethod = "DecodeShiftRight32Imm"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; + let DecoderMethod = "DecodeShiftRight64Imm"; } //===----------------------------------------------------------------------===// +// ARM Assembler alias templates. +// +class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>; +class tInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>; +class t2InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>; +class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; +class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; + +//===----------------------------------------------------------------------===// // ARM Instruction templates. // + class InstTemplate<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : Instruction { @@ -240,17 +251,22 @@ class InstTemplate<AddrMode am, int sz, IndexMode im, Domain D = d; bit isUnaryDataProc = 0; bit canXformTo16Bit = 0; + // The instruction is a 16-bit flag setting Thumb instruction. Used + // by the parser to determine whether to require the 'S' suffix on the + // mnemonic (when not in an IT block) or preclude it (when in an IT block). + bit thumbArithFlagSetting = 0; // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); - // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h. + // The layout of TSFlags should be kept in sync with ARMBaseInfo.h. let TSFlags{4-0} = AM.Value; let TSFlags{6-5} = IndexModeBits; let TSFlags{12-7} = Form; let TSFlags{13} = isUnaryDataProc; let TSFlags{14} = canXformTo16Bit; let TSFlags{17-15} = D.Value; + let TSFlags{18} = thumbArithFlagSetting; let Constraints = cstr; let Itinerary = itin; @@ -262,13 +278,17 @@ class Encoding { class InstARM<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> - : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding; + : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding { + let DecoderNamespace = "ARM"; +} // This Encoding-less class is used by Thumb1 to specify the encoding bits later // on by adding flavors to specific instructions. class InstThumb<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> - : InstTemplate<am, sz, im, f, d, cstr, itin>; + : InstTemplate<am, sz, im, f, d, cstr, itin> { + let DecoderNamespace = "Thumb"; +} class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, @@ -426,11 +446,11 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { bits<4> Rt; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 1; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; let Inst{11-0} = 0b111110011111; } @@ -450,14 +470,14 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{3-0} = Rt; } class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> - : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> { + : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> { bits<4> Rt; bits<4> Rt2; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00010; let Inst{22} = b; let Inst{21-20} = 0b00; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; @@ -515,22 +535,41 @@ class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops, let Inst{20} = isLd; // L bit let Inst{15-12} = Rt; } -class AI2stridx<bit isByte, bit isPre, dag oops, dag iops, +class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; +} + +class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, pattern> { // AM2 store w/ two operands: (GPR, am2offset) - // {13} 1 == Rm, 0 == imm12 // {12} isAdd // {11-0} imm12/Rm bits<14> offset; bits<4> Rn; - let Inst{25} = offset{13}; + let Inst{25} = 0; let Inst{23} = offset{12}; let Inst{19-16} = Rn; let Inst{11-0} = offset{11-0}; } + + // FIXME: Merge with the above class when addrmode2 gets used for STR, STRB // but for now use this class for STRT and STRBT. class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops, @@ -568,9 +607,11 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{7-4} = op; let Inst{3-0} = addr{3-0}; // imm3_0/Rm + + let DecoderMethod = "DecodeAddrMode3Instruction"; } -class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, +class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : I<oops, iops, AddrMode3, 4, im, f, itin, @@ -586,48 +627,24 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, // FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB // but for now use this class for LDRSBT, LDRHT, LDSHT. -class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, +class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, im, f, itin, - opc, asm, cstr, pattern> { + : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> { // {13} 1 == imm8, 0 == Rm // {12-9} Rn // {8} isAdd // {7-4} imm7_4/zero // {3-0} imm3_0/Rm - bits<14> addr; - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = isPre; // P bit - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{20} = op20; // L bit - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Rt; // Rt - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{7-4} = op; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3"; -} - -class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, - IndexMode im, Format f, InstrItinClass itin, string opc, - string asm, string cstr, list<dag> pattern> - : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, - pattern> { - // AM3 store w/ two operands: (GPR, am3offset) - bits<14> offset; + bits<4> addr; bits<4> Rt; - bits<4> Rn; let Inst{27-25} = 0b000; - let Inst{23} = offset{8}; - let Inst{22} = offset{9}; - let Inst{19-16} = Rn; + let Inst{24} = 0; // P bit + let Inst{21} = 1; + let Inst{20} = isLoad; // L bit + let Inst{19-16} = addr; // Rn let Inst{15-12} = Rt; // Rt - let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{7-4} = op; - let Inst{3-0} = offset{3-0}; // imm3_0/Rm } // stores @@ -648,75 +665,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin, let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{7-4} = op; let Inst{3-0} = addr{3-0}; // imm3_0/Rm -} - -// Pre-indexed stores -class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} - -// Post-indexed stores -class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - // {13} 1 == imm8, 0 == Rm - // {12-9} Rn - // {8} isAdd - // {7-4} imm7_4/zero - // {3-0} imm3_0/Rm - bits<14> addr; - bits<4> Rt; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{15-12} = Rt; // Rt - let Inst{19-16} = addr{12-9}; // Rn - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{23} = addr{8}; // U bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} -class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; + let DecoderMethod = "DecodeAddrMode3Instruction"; } // addrmode4 instructions @@ -843,6 +792,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, } // PKH instructions +def PKHLSLAsmOperand : AsmOperandClass { + let Name = "PKHLSLImm"; + let ParserMethod = "parsePKHLSLImm"; +} +def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{ + let PrintMethod = "printPKHLSLShiftImm"; + let ParserMatchClass = PKHLSLAsmOperand; +} +def PKHASRAsmOperand : AsmOperandClass { + let Name = "PKHASRImm"; + let ParserMethod = "parsePKHASRImm"; +} +def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{ + let PrintMethod = "printPKHASRShiftImm"; + let ParserMatchClass = PKHASRAsmOperand; +} + class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, @@ -850,11 +816,11 @@ class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, bits<4> Rd; bits<4> Rn; bits<4> Rm; - bits<8> sh; + bits<5> sh; let Inst{27-20} = opcod; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; + let Inst{11-7} = sh; let Inst{6} = tb; let Inst{5-4} = 0b01; let Inst{3-0} = Rm; @@ -949,7 +915,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz, let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; + let thumbArithFlagSetting = 1; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "ThumbSBit"; } class T1sI<dag oops, dag iops, InstrItinClass itin, @@ -1071,6 +1039,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an @@ -1091,6 +1060,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } // Special cases @@ -1103,6 +1073,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } class ThumbXI<dag oops, dag iops, AddrMode am, int sz, @@ -1114,6 +1085,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "Thumb"; } class T2I<dag oops, dag iops, InstrItinClass itin, @@ -1132,8 +1104,8 @@ class T2Ipc<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>; class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "", + string opc, string asm, string cstr, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr, pattern> { bits<4> Rt; bits<4> Rt2; @@ -1149,6 +1121,26 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rt2{3-0}; let Inst{7-0} = addr{7-0}; } +class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, string cstr, + list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr, + pattern> { + bits<4> Rt; + bits<4> Rt2; + bits<4> addr; + bits<9> imm; + let Inst{31-25} = 0b1110100; + let Inst{24} = P; + let Inst{23} = imm{8}; + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = isLoad; + let Inst{19-16} = addr; + let Inst{15-12} = Rt{3-0}; + let Inst{11-8} = Rt2{3-0}; + let Inst{7-0} = imm{7-0}; +} class T2sI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1172,8 +1164,8 @@ class T2XIt<dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>; -// T2Iidxldst - Thumb2 indexed load / store instructions. -class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, +// T2Ipreldst - Thumb2 pre-indexed load / store instructions. +class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre, dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -1183,25 +1175,60 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; + + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = opcod; let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt{3-0}; let Inst{11} = 1; // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed let Inst{10} = pre; // The P bit. + let Inst{9} = addr{8}; // Sign bit let Inst{8} = 1; // The W bit. + let Inst{7-0} = addr{7-0}; - bits<9> addr; - let Inst{7-0} = addr{7-0}; - let Inst{9} = addr{8}; // Sign bit + let DecoderMethod = "DecodeT2LdStPre"; +} + +// T2Ipostldst - Thumb2 post-indexed load / store instructions. +class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre, + dag oops, dag iops, + AddrMode am, IndexMode im, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; bits<4> Rt; bits<4> Rn; + bits<9> offset; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = load; + let Inst{19-16} = Rn; let Inst{15-12} = Rt{3-0}; - let Inst{19-16} = Rn{3-0}; + let Inst{11} = 1; + // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed + let Inst{10} = pre; // The P bit. + let Inst{9} = offset{8}; // Sign bit + let Inst{8} = 1; // The W bit. + let Inst{7-0} = offset{7-0}; + + let DecoderMethod = "DecodeT2LdStPre"; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. @@ -1242,6 +1269,7 @@ class VFPI<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; list<Predicate> Predicates = [HasVFP2]; } @@ -1257,6 +1285,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; list<Predicate> Predicates = [HasVFP2]; } @@ -1574,6 +1603,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; } // Same as NeonI except it does not have a "data type" specifier. @@ -1586,6 +1616,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let AsmString = !strconcat(opc, "${p}", "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; } class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, @@ -1600,6 +1631,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, let Inst{7-4} = op7_4; let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder"; + let DecoderNamespace = "NEONLoadStore"; bits<5> Vd; bits<6> Rn; @@ -1643,6 +1675,7 @@ class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, pattern> { let Inst{31-25} = 0b1111001; let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; } class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1651,6 +1684,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, cstr, pattern> { let Inst{31-25} = 0b1111001; let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; } // NEON "one register and a modified immediate" format. @@ -1677,6 +1711,7 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, let Inst{24} = SIMM{7}; let Inst{18-16} = SIMM{6-4}; let Inst{3-0} = SIMM{3-0}; + let DecoderMethod = "DecodeNEONModImmInstruction"; } // NEON 2 vector register format. @@ -1874,6 +1909,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, list<Predicate> Predicates = [HasNEON]; let PostEncoderMethod = "NEONThumb2DupPostEncoder"; + let DecoderNamespace = "NEONDup"; bits<5> V; bits<4> R; @@ -1915,7 +1951,6 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, bits<5> Vd; bits<5> Vm; - bits<4> lane; let Inst{22} = Vd{4}; let Inst{15-12} = Vd{3-0}; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index adcbf18..48da03f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,8 +13,8 @@ #include "ARMInstrInfo.h" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -30,14 +30,18 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; - case ARM::LDR_PRE: - case ARM::LDR_POST: + case ARM::LDR_PRE_IMM: + case ARM::LDR_PRE_REG: + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: return ARM::LDRi12; case ARM::LDRH_PRE: case ARM::LDRH_POST: return ARM::LDRH; - case ARM::LDRB_PRE: - case ARM::LDRB_POST: + case ARM::LDRB_PRE_IMM: + case ARM::LDRB_PRE_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: return ARM::LDRBi12; case ARM::LDRSH_PRE: case ARM::LDRSH_POST: @@ -45,14 +49,18 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { case ARM::LDRSB_PRE: case ARM::LDRSB_POST: return ARM::LDRSB; - case ARM::STR_PRE: - case ARM::STR_POST: + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: return ARM::STRi12; case ARM::STRH_PRE: case ARM::STRH_POST: return ARM::STRH; - case ARM::STRB_PRE: - case ARM::STRB_POST: + case ARM::STRB_PRE_IMM: + case ARM::STRB_PRE_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: return ARM::STRBi12; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index a42dd1a..2cf0f09 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -70,6 +70,18 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -120,6 +132,12 @@ def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; +def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; +def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; +def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; + def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; @@ -187,10 +205,16 @@ def IsThumb : Predicate<"Subtarget->isThumb()">, def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate<"ModeThumb,FeatureThumb2">; +def IsMClass : Predicate<"Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass">; +def IsARClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"!FeatureMClass">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">, + AssemblerPredicate<"ModeNaCl">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -263,24 +287,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_65535AsmOperand; } +class BinOpWithFlagFrag<dag res> : + PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; -/// adde and sube predicates - True based on whether the carry flag output -/// will be needed or not. -def adde_dead_carry : - PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), - [{return !N->hasAnyUseOfValue(1);}]>; -def sube_dead_carry : - PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), - [{return !N->hasAnyUseOfValue(1);}]>; -def adde_live_carry : - PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), - [{return N->hasAnyUseOfValue(1);}]>; -def sube_live_carry : - PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), - [{return N->hasAnyUseOfValue(1);}]>; - // An 'and' node with a single use. def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ return N->hasOneUse(); @@ -315,6 +326,7 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeT2BROperand"; } // FIXME: get rid of this one? @@ -345,39 +357,35 @@ def bl_target : Operand<i32> { let OperandType = "OPERAND_PCREL"; } - -// A list of registers separated by comma. Used by load/store multiple. -def RegListAsmOperand : AsmOperandClass { - let Name = "RegList"; - let SuperClasses = []; -} - -def DPRRegListAsmOperand : AsmOperandClass { - let Name = "DPRRegList"; - let SuperClasses = []; -} - -def SPRRegListAsmOperand : AsmOperandClass { - let Name = "SPRRegList"; - let SuperClasses = []; +def blx_target : Operand<i32> { + // Encoded the same as branch targets. + let EncoderMethod = "getARMBLXTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } +// A list of registers separated by comma. Used by load/store multiple. +def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; } def reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = RegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeRegListOperand"; } +def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; } def dpr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = DPRRegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeDPRRegListOperand"; } +def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; } def spr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = SPRRegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeSPRRegListOperand"; } // An operand for the CONSTPOOL_ENTRY pseudo-instruction. @@ -397,56 +405,99 @@ def adrlabel : Operand<i32> { def neon_vcvt_imm32 : Operand<i32> { let EncoderMethod = "getNEONVcvtImm32OpValue"; + let DecoderMethod = "DecodeVCVTImmOperand"; } // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm : Operand<i32>, ImmLeaf<i32, [{ - int32_t v = (int32_t)Imm; - return v == 8 || v == 16 || v == 24; }]> { - let EncoderMethod = "getRotImmOpValue"; +def rot_imm_XFORM: SDNodeXForm<imm, [{ + switch (N->getZExtValue()){ + default: assert(0); + case 0: return CurDAG->getTargetConstant(0, MVT::i32); + case 8: return CurDAG->getTargetConstant(1, MVT::i32); + case 16: return CurDAG->getTargetConstant(2, MVT::i32); + case 24: return CurDAG->getTargetConstant(3, MVT::i32); + } +}]>; +def RotImmAsmOperand : AsmOperandClass { + let Name = "RotImm"; + let ParserMethod = "parseRotImm"; } - -def ShifterAsmOperand : AsmOperandClass { - let Name = "Shifter"; - let SuperClasses = []; +def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{ + int32_t v = N->getZExtValue(); + return v == 8 || v == 16 || v == 24; }], + rot_imm_XFORM> { + let PrintMethod = "printRotImmOperand"; + let ParserMatchClass = RotImmAsmOperand; } // shift_imm: An integer that encodes a shift amount and the type of shift -// (currently either asr or lsl) using the same encoding used for the -// immediates in so_reg operands. +// (asr or lsl). The 6-bit immediate encodes as: +// {5} 0 ==> lsl +// 1 asr +// {4-0} imm5 shift amount. +// asr #32 encoded as imm5 == 0. +def ShifterImmAsmOperand : AsmOperandClass { + let Name = "ShifterImm"; + let ParserMethod = "parseShifterImm"; +} def shift_imm : Operand<i32> { let PrintMethod = "printShiftImmOperand"; - let ParserMatchClass = ShifterAsmOperand; + let ParserMatchClass = ShifterImmAsmOperand; } -def ShiftedRegAsmOperand : AsmOperandClass { - let Name = "ShiftedReg"; +// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm. +def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } +def so_reg_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectRegShifterOperand", + [shl, srl, sra, rotr]> { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; + let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm); } -// shifter_operand operands: so_reg and so_imm. -def so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShifterOperandReg", - [shl,srl,sra,rotr]> { - let EncoderMethod = "getSORegOpValue"; - let PrintMethod = "printSORegOperand"; - let ParserMatchClass = ShiftedRegAsmOperand; - let MIOperandInfo = (ops GPR, GPR, shift_imm); +def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; } +def so_reg_imm : Operand<i32>, // reg imm + ComplexPattern<i32, 2, "SelectImmShifterOperand", + [shl, srl, sra, rotr]> { + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// FIXME: Does this need to be distinct from so_reg? +def shift_so_reg_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectShiftRegShifterOperand", + [shl,srl,sra,rotr]> { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); } + // FIXME: Does this need to be distinct from so_reg? -def shift_so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", +def shift_so_reg_imm : Operand<i32>, // reg reg imm + ComplexPattern<i32, 2, "SelectShiftImmShifterOperand", [shl,srl,sra,rotr]> { - let EncoderMethod = "getSORegOpValue"; - let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, shift_imm); + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let MIOperandInfo = (ops GPR, i32imm); } + // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. +def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; } def so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { let EncoderMethod = "getSOImmOpValue"; + let ParserMatchClass = SOImmAsmOperand; + let DecoderMethod = "DecodeSOImmOperand"; } // Break so_imm's up into two pieces. This handles immediates with up to 16 @@ -464,7 +515,7 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; -/// imm0_7 predicate - Immediate in the range [0,31]. +/// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; @@ -472,7 +523,7 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_7AsmOperand; } -/// imm0_15 predicate - Immediate in the range [0,31]. +/// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; @@ -481,68 +532,83 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. +def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; -}]>; - -/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. -def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 32; }]> { - let EncoderMethod = "getImmMinusOneOpValue"; + let ParserMatchClass = Imm0_31AsmOperand; +} + +/// imm0_255 predicate - Immediate in the range [0,255]. +def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; } +def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { + let ParserMatchClass = Imm0_255AsmOperand; } -// i32imm_hilo16 - For movt/movw - sets the MC Encoder method. -// The imm is split into imm{15-12}, imm{11-0} +// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference +// a relocatable expression. // -def i32imm_hilo16 : Operand<i32> { +// FIXME: This really needs a Thumb version separate from the ARM version. +// While the range is the same, and can thus use the same match class, +// the encoding is different so it should have a different encoder method. +def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; } +def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; + let ParserMatchClass = Imm0_65535ExprAsmOperand; } +/// imm24b - True if the 32-bit immediate is encodable in 24 bits. +def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; } +def imm24b : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm <= 0xffffff; +}]> { + let ParserMatchClass = Imm24bitAsmOperand; +} + + /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield /// e.g., 0xf000ffff +def BitfieldAsmOperand : AsmOperandClass { + let Name = "Bitfield"; + let ParserMethod = "parseBitfield"; +} def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let EncoderMethod = "getBitfieldInvertedMaskOpValue"; let PrintMethod = "printBitfieldInvMaskImmOperand"; + let DecoderMethod = "DecodeBitfieldMaskOperand"; + let ParserMatchClass = BitfieldAsmOperand; } -/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p -def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{ - return isInt<5>(Imm); +def imm1_32_XFORM: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32); }]>; - -/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p -def width_imm : Operand<i32>, ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}] > { - let EncoderMethod = "getMsbOpValue"; -} - -def ssat_imm : Operand<i32>, ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}]> { - let EncoderMethod = "getSsatBitPosValue"; +def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; } +def imm1_32 : Operand<i32>, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; + }], + imm1_32_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_32AsmOperand; +} + +def imm1_16_XFORM: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32); +}]>; +def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; } +def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], + imm1_16_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_16AsmOperand; } // Define ARM specific addressing modes. - -def MemMode2AsmOperand : AsmOperandClass { - let Name = "MemMode2"; - let SuperClasses = []; - let ParserMethod = "tryParseMemMode2Operand"; -} - -def MemMode3AsmOperand : AsmOperandClass { - let Name = "MemMode3"; - let SuperClasses = []; - let ParserMethod = "tryParseMemMode3Operand"; -} - // addrmode_imm12 := reg +/- imm12 // +def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } def addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode @@ -551,53 +617,129 @@ def addrmode_imm12 : Operand<i32>, let EncoderMethod = "getAddrModeImm12OpValue"; let PrintMethod = "printAddrModeImm12Operand"; + let DecoderMethod = "DecodeAddrModeImm12Operand"; + let ParserMatchClass = MemImm12OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } // ldst_so_reg := reg +/- reg shop imm // +def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } def ldst_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectLdStSOReg", []> { let EncoderMethod = "getLdStSORegOpValue"; // FIXME: Simplify the printer let PrintMethod = "printAddrMode2Operand"; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); + let DecoderMethod = "DecodeSORegMemOperand"; + let ParserMatchClass = MemRegOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift); +} + +// postidx_imm8 := +/- [0,255] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value. +def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } +def postidx_imm8 : Operand<i32> { + let PrintMethod = "printPostIdxImm8Operand"; + let ParserMatchClass = PostIdxImm8AsmOperand; + let MIOperandInfo = (ops i32imm); } +// postidx_imm8s4 := +/- [0,1020] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value, scaled by 4. +def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } +def postidx_imm8s4 : Operand<i32> { + let PrintMethod = "printPostIdxImm8s4Operand"; + let ParserMatchClass = PostIdxImm8s4AsmOperand; + let MIOperandInfo = (ops i32imm); +} + + +// postidx_reg := +/- reg +// +def PostIdxRegAsmOperand : AsmOperandClass { + let Name = "PostIdxReg"; + let ParserMethod = "parsePostIdxReg"; +} +def postidx_reg : Operand<i32> { + let EncoderMethod = "getPostIdxRegOpValue"; + let DecoderMethod = "DecodePostIdxReg"; + let PrintMethod = "printPostIdxRegOperand"; + let ParserMatchClass = PostIdxRegAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + + // addrmode2 := reg +/- imm12 // := reg +/- reg shop imm // +// FIXME: addrmode2 should be refactored the rest of the way to always +// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). +def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } def addrmode2 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; - let ParserMatchClass = MemMode2AsmOperand; + let ParserMatchClass = AddrMode2AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } -def am2offset : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode2Offset", +def PostIdxRegShiftedAsmOperand : AsmOperandClass { + let Name = "PostIdxRegShifted"; + let ParserMethod = "parsePostIdxReg"; +} +def am2offset_reg : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg", + [], [SDNPWantRoot]> { + let EncoderMethod = "getAddrMode2OffsetOpValue"; + let PrintMethod = "printAddrMode2OffsetOperand"; + // When using this for assembly, it's always as a post-index offset. + let ParserMatchClass = PostIdxRegShiftedAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// FIXME: am2offset_imm should only need the immediate, not the GPR. Having +// the GPR is purely vestigal at this point. +def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } +def am2offset_imm : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; + let ParserMatchClass = AM2OffsetImmAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } + // addrmode3 := reg +/- reg // addrmode3 := reg +/- imm8 // +// FIXME: split into imm vs. reg versions. +def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } def addrmode3 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; - let ParserMatchClass = MemMode3AsmOperand; + let ParserMatchClass = AddrMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } +// FIXME: split into imm vs. reg versions. +// FIXME: parser method to handle +/- register. +def AM3OffsetAsmOperand : AsmOperandClass { + let Name = "AM3Offset"; + let ParserMethod = "parseAM3Offset"; +} def am3offset : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode3Offset", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode3OffsetOpValue"; let PrintMethod = "printAddrMode3OffsetOperand"; + let ParserMatchClass = AM3OffsetAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } @@ -608,28 +750,28 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { let PrintMethod = "printLdStmModeOperand"; } -def MemMode5AsmOperand : AsmOperandClass { - let Name = "MemMode5"; - let SuperClasses = []; -} - // addrmode5 := reg +/- imm8*4 // +def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } def addrmode5 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode5", []> { let PrintMethod = "printAddrMode5Operand"; - let MIOperandInfo = (ops GPR:$base, i32imm); - let ParserMatchClass = MemMode5AsmOperand; let EncoderMethod = "getAddrMode5OpValue"; + let DecoderMethod = "DecodeAddrMode5Operand"; + let ParserMatchClass = AddrMode5AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); } // addrmode6 := reg with optional alignment // +def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } def addrmode6 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; + let ParserMatchClass = AddrMode6AsmOperand; } def am6offset : Operand<i32>, @@ -638,6 +780,7 @@ def am6offset : Operand<i32>, let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; + let DecoderMethod = "DecodeGPRRegisterClass"; } // Special version of addrmode6 to handle alignment encoding for VST1/VLD1 @@ -666,19 +809,15 @@ def addrmodepc : Operand<i32>, let MIOperandInfo = (ops GPR, i32imm); } -def MemMode7AsmOperand : AsmOperandClass { - let Name = "MemMode7"; - let SuperClasses = []; -} - -// addrmode7 := reg -// Used by load/store exclusive instructions. Useful to enable right assembly -// parsing and printing. Not used for any codegen matching. +// addr_offset_none := reg // -def addrmode7 : Operand<i32> { +def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } +def addr_offset_none : Operand<i32>, + ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> { let PrintMethod = "printAddrMode7Operand"; - let MIOperandInfo = (ops GPR); - let ParserMatchClass = MemMode7AsmOperand; + let DecoderMethod = "DecodeAddrMode7Operand"; + let ParserMatchClass = MemNoOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base); } def nohash_imm : Operand<i32> { @@ -687,25 +826,30 @@ def nohash_imm : Operand<i32> { def CoprocNumAsmOperand : AsmOperandClass { let Name = "CoprocNum"; - let SuperClasses = []; - let ParserMethod = "tryParseCoprocNumOperand"; -} - -def CoprocRegAsmOperand : AsmOperandClass { - let Name = "CoprocReg"; - let SuperClasses = []; - let ParserMethod = "tryParseCoprocRegOperand"; + let ParserMethod = "parseCoprocNumOperand"; } - def p_imm : Operand<i32> { let PrintMethod = "printPImmediate"; let ParserMatchClass = CoprocNumAsmOperand; + let DecoderMethod = "DecodeCoprocessor"; } +def CoprocRegAsmOperand : AsmOperandClass { + let Name = "CoprocReg"; + let ParserMethod = "parseCoprocRegOperand"; +} def c_imm : Operand<i32> { let PrintMethod = "printCImmediate"; let ParserMatchClass = CoprocRegAsmOperand; } +def CoprocOptionAsmOperand : AsmOperandClass { + let Name = "CoprocOption"; + let ParserMethod = "parseCoprocOptionOperand"; +} +def coproc_option_imm : Operand<i32> { + let PrintMethod = "printCoprocOptionImm"; + let ParserMatchClass = CoprocOptionAsmOperand; +} //===----------------------------------------------------------------------===// @@ -748,16 +892,37 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; } - def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } // Assembly aliases for optional destination operand when it's the same @@ -773,56 +938,172 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, - so_reg:$shift, pred:$p, + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + } -/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the -/// instruction modifies the CPSR register. -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass AI1_bin_s_irs<bits<4> opcod, string opc, +/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the AsI1_bin_irs counterpart. +multiclass AsI1_rbin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { - def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + PatFrag opnode, string baseOpc, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> { bits<4> Rd; bits<4> Rn; bits<12> imm; let Inst{25} = 1; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{11-0} = imm; } - def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + [/* pattern left blank */]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; - let isCommutable = Commutable; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; } - def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + +} + +/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { +multiclass AsI1_rbin_s_is<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; + + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [/* pattern left blank */]>; + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>; + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} +} + +/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { +multiclass AsI1_bin_s_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>; + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>; + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>; } } @@ -857,128 +1138,190 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; } - def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis, + def rsi : AI1<opcod, (outs), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, + opc, "\t$Rn, $shift", + [(opnode GPR:$Rn, so_reg_imm:$shift)]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AI1<opcod, (outs), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg:$shift)]> { + [(opnode GPR:$Rn, so_reg_reg:$shift)]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = 0b0000; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } + } } /// AI_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. /// FIXME: Remove the 'r' variant. Its rot_imm is zero. -multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iEXTr, opc, "\t$Rd, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { - bits<4> Rd; - bits<4> Rm; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-10} = 0b00; - let Inst{3-0} = Rm; - } - def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", - [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { - bits<4> Rd; - bits<4> Rm; - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{3-0} = Rm; - } +class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", + [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{3-0} = Rm; } -multiclass AI_ext_rrot_np<bits<8> opcod, string opc> { - def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iEXTr, opc, "\t$Rd, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{19-16} = 0b1111; - let Inst{11-10} = 0b00; - } - def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{11-10} = rot; - } +class AI_ext_rrot_np<bits<8> opcod, string opc> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>, + Requires<[IsARM, HasV6]> { + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{11-10} = rot; } /// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { +class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", + [(set GPRnopc:$Rd, (opnode GPR:$Rn, + (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{9-4} = 0b000111; + let Inst{3-0} = Rm; +} + +class AI_exta_rrot_np<bits<8> opcod, string opc> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>, + Requires<[IsARM, HasV6]> { + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{11-10} = rot; +} + +/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. +multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, + string baseOpc, bit Commutable = 0> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>, + Requires<[IsARM]> { bits<4> Rd; - bits<4> Rm; bits<4> Rn; - let Inst{19-16} = Rn; + bits<12> imm; + let Inst{25} = 1; let Inst{15-12} = Rd; - let Inst{11-10} = 0b00; - let Inst{9-4} = 0b000111; - let Inst{3-0} = Rm; - } - def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, - rot_imm:$rot), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [(set GPR:$Rd, (opnode GPR:$Rn, - (rotr GPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + let Inst{19-16} = Rn; + let Inst{11-0} = imm; + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>, + Requires<[IsARM]> { bits<4> Rd; + bits<4> Rn; bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let isCommutable = Commutable; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; bits<4> Rn; - bits<2> rot; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{9-4} = 0b000111; - let Inst{3-0} = Rm; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; } -} - -// For disassembly only. -multiclass AI_exta_rrot_np<bits<8> opcod, string opc> { - def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{11-10} = 0b00; - } - def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, - rot_imm:$rot), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; bits<4> Rn; - bits<2> rot; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; - let Inst{11-10} = rot; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; } -/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. -multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc, bit Commutable = 0> { - let Uses = [CPSR] in { +/// AI1_rsc_irs - Define instructions and patterns for rsc +multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode, + string baseOpc> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -990,31 +1333,48 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM]> { + [/* pattern left blank */]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; let Inst{11-4} = 0b00000000; let Inst{25} = 0; - let isCommutable = Commutable; let Inst{3-0} = Rm; let Inst{15-12} = Rd; let Inst{19-16} = Rn; } - def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, + def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; - let Inst{11-0} = shift; + let Inst{19-16} = Rn; let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } } + // Assembly aliases for optional destination operand when it's the same // as the source operand. def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), @@ -1028,28 +1388,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, - so_reg:$shift, pred:$p, + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; -} - -// Carry setting variants -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; - def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - 4, IIC_iALUr, - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { - let isCommutable = Commutable; - } - def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; -} } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -1082,6 +1429,37 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii, } } +let canFoldAsLoad = 1, isReMaterializable = 1 in { +multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", + [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} +} + + multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { // Note: We use the complex addrmode_imm12 rather than just an input @@ -1110,6 +1488,37 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, let Inst{11-0} = shift{11-0}; } } + +multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12 : AI2ldst<0b010, 0, isByte, (outs), + (ins GPRnopc:$Rt, addrmode_imm12:$addr), + AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", + [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} + + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -1140,42 +1549,66 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", - [/* For disassembly only; pattern left blank */]>, +// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. +// (These psuedos use a hand-written selection code). +let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { +def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2), + NoItinerary, []>; +} + +def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000000; } -def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", - [/* For disassembly only; pattern left blank */]>, +def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000001; } -def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", - [/* For disassembly only; pattern left blank */]>, +def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000010; } -def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", - [/* For disassembly only; pattern left blank */]>, +def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000011; } -def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", - "\t$dst, $a, $b", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { +def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", + "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1188,8 +1621,7 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", } def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6T2]> { + []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000100; @@ -1206,14 +1638,11 @@ def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{7-4} = 0b0111; } -// Change Processor State is a system instruction -- for disassembly and -// parsing only. -// FIXME: Since the asm parser has currently no clean way to handle optional -// operands, create 3 versions of the same instruction. Once there's a clean -// framework to represent optional operands, change this behavior. +// Change Processor State +// FIXME: We should use InstAlias to handle the optional operands. class CPS<dag iops, string asm_ops> : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops), - [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> { + []>, Requires<[IsARM]> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -1229,17 +1658,18 @@ class CPS<dag iops, string asm_ops> let Inst{4-0} = mode; } +let DecoderMethod = "DecodeCPSInstruction" in { let M = 1 in - def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), + def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode), "$imod\t$iflags, $mode">; let mode = 0, M = 0 in def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">; let imod = 0, iflags = 0, M = 1 in - def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">; + def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">; +} // Preload signals the memory system of possible future data/instruction access. -// These are for disassembly only. multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, @@ -1271,6 +1701,7 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = 0b1111; let Inst{11-0} = shift{11-0}; + let Inst{4} = 0; } } @@ -1278,10 +1709,8 @@ defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>; defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; -def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM]> { +def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, + "setend\t$end", []>, Requires<[IsARM]> { bits<1> end; let Inst{31-10} = 0b1111000100000001000000; let Inst{9} = end; @@ -1351,14 +1780,17 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), - MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { + MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> { bits<4> Rd; - bits<12> label; + bits<14> label; let Inst{27-25} = 0b001; + let Inst{24} = 0; + let Inst{23-22} = label{13-12}; + let Inst{21} = 0; let Inst{20} = 0; let Inst{19-16} = 0b1111; let Inst{15-12} = Rd; - let Inst{11-0} = label; + let Inst{11-0} = label{11-0}; } def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; @@ -1424,6 +1856,7 @@ let isCall = 1, let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; } def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), @@ -1432,6 +1865,7 @@ let isCall = 1, Requires<[IsARM, IsNotDarwin]> { bits<24> func; let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; } // ARMv5T and above @@ -1516,6 +1950,7 @@ let isBranch = 1, isTerminator = 1 in { [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { bits<24> target; let Inst{23-0} = target; + let DecoderMethod = "DecodeBranchImmInstruction"; } let isBarrier = 1 in { @@ -1549,9 +1984,9 @@ let isBranch = 1, isTerminator = 1 in { } -// BLX (immediate) -- for disassembly only -def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, - "blx\t$target", [/* pattern left blank */]>, +// BLX (immediate) +def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", []>, Requires<[IsARM, HasV5T]> { let Inst{31-25} = 0b1111101; bits<25> target; @@ -1614,64 +2049,100 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } } - - - - -// Secure Monitor Call is a system instruction -- for disassembly only -def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", - [/* For disassembly only; pattern left blank */]> { +// Secure Monitor Call is a system instruction. +def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", + []> { bits<4> opt; let Inst{23-4} = 0b01100000000000000111; let Inst{3-0} = opt; } -// Supervisor Call (Software Interrupt) -- for disassembly only +// Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", - [/* For disassembly only; pattern left blank */]> { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> { bits<24> svc; let Inst{23-0} = svc; } } -// Store Return State is a system instruction -- for disassembly only -let isCodeGenOnly = 1 in { // FIXME: This should not use submode! -def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), - NoItinerary, "srs${amode}\tsp!, $mode", - [/* For disassembly only; pattern left blank */]> { +// Store Return State +class SRSI<bit wb, string asm> + : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<5> mode; let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b110; // W = 1 - let Inst{19-8} = 0xd05; - let Inst{7-5} = 0b000; + let Inst{27-25} = 0b100; + let Inst{22} = 1; + let Inst{21} = wb; + let Inst{20} = 0; + let Inst{19-16} = 0b1101; // SP + let Inst{15-5} = 0b00000101000; + let Inst{4-0} = mode; } -def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), - NoItinerary, "srs${amode}\tsp, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b100; // W = 0 - let Inst{19-8} = 0xd05; - let Inst{7-5} = 0b000; +def SRSDA : SRSI<0, "srsda\tsp, $mode"> { + let Inst{24-23} = 0; +} +def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> { + let Inst{24-23} = 0; +} +def SRSDB : SRSI<0, "srsdb\tsp, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSIA : SRSI<0, "srsia\tsp, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIB : SRSI<0, "srsib\tsp, $mode"> { + let Inst{24-23} = 0b11; +} +def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { + let Inst{24-23} = 0b11; } -// Return From Exception is a system instruction -- for disassembly only -def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), - NoItinerary, "rfe${amode}\t$base!", - [/* For disassembly only; pattern left blank */]> { +// Return From Exception +class RFEI<bit wb, string asm> + : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<4> Rn; let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b011; // W = 1 - let Inst{15-0} = 0x0a00; + let Inst{27-25} = 0b100; + let Inst{22} = 0; + let Inst{21} = wb; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-0} = 0xa00; } -def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), - NoItinerary, "rfe${amode}\t$base", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b001; // W = 0 - let Inst{15-0} = 0x0a00; +def RFEDA : RFEI<0, "rfeda\t$Rn"> { + let Inst{24-23} = 0; +} +def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> { + let Inst{24-23} = 0; +} +def RFEDB : RFEI<0, "rfedb\t$Rn"> { + let Inst{24-23} = 0b10; +} +def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> { + let Inst{24-23} = 0b10; +} +def RFEIA : RFEI<0, "rfeia\t$Rn"> { + let Inst{24-23} = 0b01; +} +def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> { + let Inst{24-23} = 0b01; +} +def RFEIB : RFEI<0, "rfeib\t$Rn"> { + let Inst{24-23} = 0b11; +} +def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { + let Inst{24-23} = 0b11; } -} // isCodeGenOnly = 1 //===----------------------------------------------------------------------===// // Load / store Instructions. @@ -1682,16 +2153,16 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, UnOpFrag<(load node:$Src)>>; -defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, +defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, UnOpFrag<(zextloadi8 node:$Src)>>; defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, +defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, - isReMaterializable = 1 in + isReMaterializable = 1, isCodeGenOnly = 1 in def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", []> { @@ -1727,34 +2198,65 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), // Indexed loads multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { - def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode2:$addr), IndexModePre, LdFrm, itin, + def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 - // {12} isAdd - // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{16-13}; + let Inst{11-0} = addr{11-0}; + let DecoderMethod = "DecodeLDRPreImm"; + let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12"; + } + + def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; let Inst{23} = addr{12}; - let Inst{19-16} = addr{17-14}; + let Inst{19-16} = addr{16-13}; let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; + let Inst{4} = 0; + let DecoderMethod = "DecodeLDRPreReg"; + let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2"; } - def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins GPR:$Rn, am2offset:$offset), + + def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), IndexModePost, LdFrm, itin, - opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { - // {13} 1 == Rm, 0 == imm12 + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; - bits<4> Rn; - let Inst{25} = offset{13}; + bits<4> addr; + let Inst{25} = 0; let Inst{23} = offset{12}; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } + } let mayLoad = 1, neverHasSideEffects = 1 in { @@ -1762,8 +2264,8 @@ defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>; defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>; } -multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> { - def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), +multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { + def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins addrmode3:$addr), IndexModePre, LdMiscFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1773,27 +2275,31 @@ multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; + let DecoderMethod = "DecodeAddrMode3Instruction"; } - def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins GPR:$Rn, am3offset:$offset), IndexModePost, - LdMiscFrm, itin, - opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { + def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am3offset:$offset), + IndexModePost, LdMiscFrm, itin, + opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + []> { bits<10> offset; - bits<4> Rn; + bits<4> addr; let Inst{23} = offset{8}; // U bit let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; } } let mayLoad = 1, neverHasSideEffects = 1 in { -defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; -defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; -defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; +defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; +defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; +defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; let hasExtraDefRegAllocReq = 1 in { -def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), (ins addrmode3:$addr), IndexModePre, LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", @@ -1804,70 +2310,128 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + let AsmMatchConverter = "cvtLdrdPre"; } -def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins GPR:$Rn, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$Rn], $offset", - "$Rn = $Rn_wb", []> { +def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am3offset:$offset), + IndexModePost, LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { bits<10> offset; - bits<4> Rn; + bits<4> addr; let Inst{23} = offset{8}; // U bit let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; } } // hasExtraDefRegAllocReq = 1 } // mayLoad = 1, neverHasSideEffects = 1 -// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. +// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. let mayLoad = 1, neverHasSideEffects = 1 in { -def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru, - "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 +def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; - let Inst{23} = addr{12}; + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let Inst{19-16} = addr{17-14}; - let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; -} -def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; - let Inst{23} = addr{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr{17-14}; - let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; -} -def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + +def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite -} -def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +multiclass AI3ldrT<bits<4> op, string opc> { + def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + (ins addr_offset_none:$addr, postidx_imm8:$offset), + IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, + "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + let AsmMatchConverter = "cvtLdExtTWriteBackImm"; + } + def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + (ins addr_offset_none:$addr, postidx_reg:$Rm), + IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, + "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Inst{3-0} = Rm{3-0}; + let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + } } + +defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">; +defm LDRHT : AI3ldrT<0b1011, "ldrht">; +defm LDRSHT : AI3ldrT<0b1111, "ldrsht">; } // Store @@ -1881,98 +2445,302 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, - "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$Rt, $src2, $addr", []>, + Requires<[IsARM, HasV5TE]> { + let Inst{21} = 0; +} // Indexed stores -def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePre, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; - -def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePost, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; - -def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePre, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, - GPR:$Rn, am2offset:$offset))]>; -def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, - GPR:$Rn, am2offset:$offset))]>; - -def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), - IndexModePre, StMiscFrm, IIC_iStore_ru, - "strh", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; - -def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), - IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, - GPR:$Rn, am3offset:$offset))]>; - -// For disassembly only +multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { + def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, + StFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; // imm12 + let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12"; + let DecoderMethod = "DecodeSTRPreImm"; + } + + def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, ldst_so_reg:$addr), + IndexModePre, StFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; + let Inst{4} = 0; // Inst{4} = 0 + let AsmMatchConverter = "cvtStWriteBackRegAddrMode2"; + let DecoderMethod = "DecodeSTRPreReg"; + } + def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } +} + +let mayStore = 1, neverHasSideEffects = 1 in { +defm STR : AI2_stridx<0, "str", IIC_iStore_ru>; +defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>; +} + +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STR_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; + +// Pseudo-instructions for pattern matching the pre-indexed stores. We can't +// put the patterns on the instruction definitions directly as ISel wants +// the address base and offset to be separate operands, not a single +// complex operand like we represent the instructions themselves. The +// pseudos map between the two. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { +def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; +} + + + +def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode3:$addr), IndexModePre, + StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "cvtStWriteBackRegAddrMode3"; + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, + addr_offset_none:$addr, + am3offset:$offset))]> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), - (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$src1, $src2, [$base, $offset]!", - "$base = $base_wb", []>; - -// For disassembly only -def STRD_POST: AI3stdpo<(outs GPR:$base_wb), - (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$src1, $src2, [$base], $offset", - "$base = $base_wb", []>; +def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + IndexModePre, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + let AsmMatchConverter = "cvtStrdPre"; +} + +def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr, + am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 -// STRT, STRBT, and STRHT are for disassembly only. +// STRT, STRBT, and STRHT -def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), - IndexModePost, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +let mayStore = 1, neverHasSideEffects = 1 in { +def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} } -def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), - StMiscFrm, IIC_iStore_bh_ru, - "strht", "\t$Rt, $addr", "$addr.base = $base_wb", - [/* For disassembly only; pattern left blank */]> { - let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode3"; + +multiclass AI3strT<bits<4> op, string opc> { + def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb), + (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc, + "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + let AsmMatchConverter = "cvtStExtTWriteBackImm"; + } + def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb), + (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc, + "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Inst{3-0} = Rm{3-0}; + let AsmMatchConverter = "cvtStExtTWriteBackReg"; + } } + +defm STRHT : AI3strT<0b1011, "strht">; + + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1996,6 +2764,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b01; // Increment After let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2012,6 +2782,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b00; // Decrement After let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2028,6 +2800,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b10; // Decrement Before let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2044,6 +2818,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b11; // Increment Before let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } } @@ -2084,6 +2860,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } +def : ARMInstAlias<"movs${p} $Rd, $Rm", + (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; + // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -2097,15 +2876,33 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, let Inst{15-12} = Rd; } -def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), - DPSoRegFrm, IIC_iMOVsr, - "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>, +def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), + DPSoRegRegFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP { + bits<4> Rd; + bits<12> src; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-8} = src{11-8}; + let Inst{7} = 0; + let Inst{6-5} = src{6-5}; + let Inst{4} = 1; + let Inst{3-0} = src{3-0}; + let Inst{25} = 0; +} + +def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), + DPSoRegImmFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, UnaryDP { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; let Inst{19-16} = 0b0000; - let Inst{11-0} = src; + let Inst{11-5} = src{11-5}; + let Inst{4} = 0; + let Inst{3-0} = src{3-0}; let Inst{25} = 0; } @@ -2121,7 +2918,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), +def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", [(set GPR:$Rd, imm0_65535:$imm)]>, @@ -2133,16 +2930,22 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; } +def : InstAlias<"mov${p} $Rd, $imm", + (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>, + Requires<[IsARM]>; + def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; let Constraints = "$src = $Rd" in { -def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), +def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), + (ins GPR:$src, imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movt", "\t$Rd, $imm", - [(set GPR:$Rd, + [(set GPRnopc:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, Requires<[IsARM, HasV6T2]> { @@ -2153,6 +2956,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; } def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), @@ -2186,30 +2990,28 @@ def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, // Sign extenders -defm SXTB : AI_ext_rrot<0b01101010, +def SXTB : AI_ext_rrot<0b01101010, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm SXTH : AI_ext_rrot<0b01101011, +def SXTH : AI_ext_rrot<0b01101011, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm SXTAB : AI_exta_rrot<0b01101010, +def SXTAB : AI_exta_rrot<0b01101010, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm SXTAH : AI_exta_rrot<0b01101011, +def SXTAH : AI_exta_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -// For disassembly only -defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; -// For disassembly only -defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; // Zero extenders let AddedComplexity = 16 in { -defm UXTB : AI_ext_rrot<0b01101110, +def UXTB : AI_ext_rrot<0b01101110, "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm UXTH : AI_ext_rrot<0b01101111, +def UXTH : AI_ext_rrot<0b01101111, "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm UXTB16 : AI_ext_rrot<0b01101100, +def UXTB16 : AI_ext_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -2217,23 +3019,22 @@ defm UXTB16 : AI_ext_rrot<0b01101100, // instead so we can include a check for masking back in the upper // eight bits of the source into the lower eight bits of the result. //def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), -// (UXTB16r_rot GPR:$Src, 24)>; +// (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16r_rot GPR:$Src, 8)>; + (UXTB16 GPR:$Src, 1)>; -defm UXTAB : AI_exta_rrot<0b01101110, "uxtab", +def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm UXTAH : AI_exta_rrot<0b01101111, "uxtah", +def UXTAH : AI_exta_rrot<0b01101111, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. -// For disassembly only -defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; -def SBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), +def SBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -2250,7 +3051,7 @@ def SBFX : I<(outs GPR:$Rd), } def UBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), + (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -2278,148 +3079,58 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; // ADD and SUB with 's' bit set. -defm ADDS : AI1_bin_s_irs<0b0100, "adds", +// +// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by +// AdjustInstrPostInstrSelection where we determine whether or not to +// set the "s" bit based on CPSR liveness. +// +// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen +// support for an optional CPSR definition that corresponds to the DAG +// node's second value. We can then eliminate the implicit def of CPSR. +defm ADDS : AsI1_bin_s_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; -defm SUBS : AI1_bin_s_irs<0b0010, "subs", + BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AsI1_bin_s_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, "ADC", 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>, + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, "SBC">; -// ADC and SUBC with 's' bit set. -let usesCustomInserter = 1 in { -defm ADCS : AI1_adde_sube_s_irs< - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm SBCS : AI1_adde_sube_s_irs< - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -} - -def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} - -// The reg/reg form is only defined for the disassembler; for codegen it is -// equivalent to SUBrr. -def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} - -def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +defm RSB : AsI1_rbin_irs <0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">; -// RSB with 's' bit set. -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; -def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - 4, IIC_iALUr, - [/* For disassembly only; pattern left blank */]>; -def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; -} - -let Uses = [CPSR] in { -def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -// The reg/reg form is only defined for the disassembler; for codegen it is -// equivalent to SUBrr. -def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} -def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} -} +// FIXME: Eliminate them if we can write def : Pat patterns which defines +// CPSR and the implicit def of CPSR is not needed. +defm RSBS : AsI1_rbin_s_is<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1, Uses = [CPSR] in { -def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; -def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; -} +defm RSC : AI1_rsc_irs<0b0111, "rsc", + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, + "RSC">; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub // assume opposite meanings of the carry flag (i.e., carry == !borrow). // See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory // details. -def : ARMPat<(add GPR:$src, so_imm_neg:$imm), - (SUBri GPR:$src, so_imm_neg:$imm)>; -def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), - (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, so_imm_neg:$imm), + (SUBri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), + (SUBSri GPR:$src, so_imm_neg:$imm)>; + // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), - (SBCri GPR:$src, so_imm_not:$imm)>; -def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), - (SBCSri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), + (SBCri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2427,12 +3138,13 @@ def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), // (mul X, 2^n+1) -> (add (X << n), X) // (mul X, 2^n-1) -> (rsb X, (X << n)) -// ARM Arithmetic Instruction -- for disassembly only +// ARM Arithmetic Instruction // GPR:$dst = GPR:$a op GPR:$b class AAI<bits<8> op27_20, bits<8> op11_4, string opc, - list<dag> pattern = [/* For disassembly only; pattern left blank */], - dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + list<dag> pattern = [], + dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), + string asm = "\t$Rd, $Rn, $Rm"> + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { bits<4> Rn; bits<4> Rd; bits<4> Rm; @@ -2443,17 +3155,19 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only +// Saturating add/subtract def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))], - (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))], - (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn), + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; +def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn), +def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; @@ -2469,7 +3183,7 @@ def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; -// Signed/Unsigned add/subtract -- for disassembly only +// Signed/Unsigned add/subtract def SASX : AAI<0b01100001, 0b11110011, "sasx">; def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; @@ -2484,7 +3198,7 @@ def USAX : AAI<0b01100101, 0b11110101, "usax">; def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; -// Signed/Unsigned halving add/subtract -- for disassembly only +// Signed/Unsigned halving add/subtract def SHASX : AAI<0b01100011, 0b11110011, "shasx">; def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; @@ -2499,7 +3213,7 @@ def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; -// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only +// Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", @@ -2531,11 +3245,11 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), let Inst{3-0} = Rn; } -// Signed/Unsigned saturate -- for disassembly only +// Signed/Unsigned saturate -def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh", - [/* For disassembly only; pattern left blank */]> { +def SSAT : AI<(outs GPRnopc:$Rd), + (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> { bits<4> Rd; bits<5> sat_imm; bits<4> Rn; @@ -2544,14 +3258,14 @@ def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh), let Inst{5-4} = 0b01; let Inst{20-16} = sat_imm; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; - let Inst{6} = sh{0}; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; let Inst{3-0} = Rn; } -def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm, - NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]> { +def SSAT16 : AI<(outs GPRnopc:$Rd), + (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> { bits<4> Rd; bits<4> sat_imm; bits<4> Rn; @@ -2562,9 +3276,9 @@ def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm, let Inst{3-0} = Rn; } -def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh", - [/* For disassembly only; pattern left blank */]> { +def USAT : AI<(outs GPRnopc:$Rd), + (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> { bits<4> Rd; bits<5> sat_imm; bits<4> Rn; @@ -2572,15 +3286,15 @@ def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), let Inst{27-21} = 0b0110111; let Inst{5-4} = 0b01; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; - let Inst{6} = sh{0}; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; let Inst{20-16} = sat_imm; let Inst{3-0} = Rn; } -def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm, - NoItinerary, "usat16", "\t$Rd, $sat_imm, $a", - [/* For disassembly only; pattern left blank */]> { +def USAT16 : AI<(outs GPRnopc:$Rd), + (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> { bits<4> Rd; bits<4> sat_imm; bits<4> Rn; @@ -2591,8 +3305,10 @@ def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm, let Inst{3-0} = Rn; } -def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; -def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; +def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos), + (SSAT imm:$pos, GPRnopc:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), + (USAT imm:$pos, GPRnopc:$a, 0)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -2611,6 +3327,10 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; +// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just +// like in the actual instruction encoding. The complexity of mapping the mask +// to the lsb/msb pair should be handled by ISel, not encapsulated in the +// instruction description. def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "bfc", "\t$Rd, $imm", "$src = $Rd", @@ -2622,16 +3342,16 @@ def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{6-0} = 0b0011111; let Inst{15-12} = Rd; let Inst{11-7} = imm{4-0}; // lsb - let Inst{20-16} = imm{9-5}; // width + let Inst{20-16} = imm{9-5}; // msb } // A8.6.18 BFI - Bitfield insert (Encoding A1) -def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", - [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn, - bf_inv_mask_imm:$imm))]>, - Requires<[IsARM, HasV6T2]> { +def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", + [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn, + bf_inv_mask_imm:$imm))]>, + Requires<[IsARM, HasV6T2]> { bits<4> Rd; bits<4> Rn; bits<10> imm; @@ -2643,25 +3363,6 @@ def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), let Inst{3-0} = Rn; } -// GNU as only supports this form of bfi (w/ 4 arguments) -let isAsmParserOnly = 1 in -def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, - lsb_pos_imm:$lsb, width_imm:$width), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd", - []>, Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<4> Rn; - bits<5> lsb; - bits<5> width; - let Inst{27-21} = 0b0111110; - let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 - let Inst{15-12} = Rd; - let Inst{11-7} = lsb; - let Inst{20-16} = width; // Custom encoder => lsb+width-1 - let Inst{3-0} = Rn; -} - def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, "mvn", "\t$Rd, $Rm", [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { @@ -2673,15 +3374,31 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, let Inst{15-12} = Rd; let Inst{3-0} = Rm; } -def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm, - IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP { +def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP { + bits<4> Rd; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; +} +def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP { bits<4> Rd; bits<12> shift; let Inst{25} = 0; let Inst{19-16} = 0b0000; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, @@ -2820,8 +3537,8 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), bits<4> RdHi; bits<4> Rm; bits<4> Rn; - let Inst{19-16} = RdLo; - let Inst{15-12} = RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; let Inst{11-8} = Rm; let Inst{3-0} = Rn; } @@ -2855,8 +3572,7 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, + IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b1111; } @@ -2869,8 +3585,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", - [/* For disassembly only; pattern left blank */]>, + IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), @@ -2881,8 +3596,7 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", - [/* For disassembly only; pattern left blank */]>, + IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; multiclass AI_smul<string opc, PatFrag opnode> { @@ -2925,92 +3639,95 @@ multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> { - def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + let DecoderMethod = "DecodeSMLAInstruction" in { + def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, - (opnode (sext_inreg GPR:$Rn, i16), - (sext_inreg GPR:$Rm, i16))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (opnode (sext_inreg GPRnopc:$Rn, i16), + (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE]>; - def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16), - (sra GPR:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), + (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE]>; - def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), - (sext_inreg GPR:$Rm, i16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE]>; - def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), - (sra GPR:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE]>; - def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, - (sext_inreg GPR:$Rm, i16)), (i32 16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, + (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]>; - def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, - (sra GPR:$Rm, (i32 16))), (i32 16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, + (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]>; + } } defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only -def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +// Halfword multiply accumulate long: SMLAL<x><y>. +def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -// Helper class for AI_smld -- for disassembly only +// Helper class for AI_smld. class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; - let Inst{4} = 1; - let Inst{5} = swap; - let Inst{6} = sub; - let Inst{7} = 0; - let Inst{21-20} = 0b00; - let Inst{22} = long; let Inst{27-23} = 0b01110; + let Inst{22} = long; + let Inst{21-20} = 0b00; let Inst{11-8} = Rm; + let Inst{7} = 0; + let Inst{6} = sub; + let Inst{5} = swap; + let Inst{4} = 1; let Inst{3-0} = Rn; } class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, @@ -3024,6 +3741,8 @@ class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> { bits<4> Ra; + bits<4> Rd; + let Inst{19-16} = Rd; let Inst{15-12} = Ra; } class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops, @@ -3037,18 +3756,20 @@ class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops, multiclass AI_smld<bit sub, string opc> { - def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">; - def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; - def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), NoItinerary, + def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; - def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), NoItinerary, + def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; } @@ -3058,10 +3779,10 @@ defm SMLS : AI_smld<1, "smls">; multiclass AI_sdml<bit sub, string opc> { - def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; - def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; } defm SMUA : AI_sdml<0, "smua">; @@ -3100,55 +3821,38 @@ def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), (and (srl GPR:$Rm, (i32 8)), 0xFF)), (REVSH GPR:$Rm)>; -def lsl_shift_imm : SDNodeXForm<imm, [{ - unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); - return CurDAG->getTargetConstant(Sh, MVT::i32); -}]>; - -def lsl_amt : ImmLeaf<i32, [{ - return Imm > 0 && Imm < 32; -}], lsl_shift_imm>; - -def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), +def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh), IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", - [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF), - (and (shl GPR:$Rm, lsl_amt:$sh), - 0xFFFF0000)))]>, + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF), + (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh), + 0xFFFF0000)))]>, Requires<[IsARM, HasV6]>; // Alternate cases for PKHBT where identities eliminate some nodes. -def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)), - (PKHBT GPR:$Rn, GPR:$Rm, 0)>; -def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)), - (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>; - -def asr_shift_imm : SDNodeXForm<imm, [{ - unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); - return CurDAG->getTargetConstant(Sh, MVT::i32); -}]>; - -def asr_amt : ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}], asr_shift_imm>; +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. -def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), +def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh), IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", - [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000), - (and (sra GPR:$Rm, asr_amt:$sh), - 0xFFFF)))]>, + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000), + (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh), + 0xFFFF)))]>, Requires<[IsARM, HasV6]>; // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)), - (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)), - (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (srl GPRnopc:$src2, imm16_31:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -3163,8 +3867,10 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm), (CMPri GPR:$src, so_imm:$imm)>; def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), (CMPrr GPR:$src, GPR:$rhs)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs), - (CMPrs GPR:$src, so_reg:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), + (CMPrsi GPR:$src, so_reg_imm:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), + (CMPrsr GPR:$src, so_reg_reg:$rhs)>; // FIXME: We have to be careful when using the CMN instruction and comparison // with 0. One would expect these two pieces of code should give identical @@ -3250,15 +3956,23 @@ def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg:$shift, pred:$p), +def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_imm:$shift, pred:$p), + 4, IIC_iCMOVsr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, + imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; +def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_reg:$shift, pred:$p), 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + let isMoveImm = 1 in def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), + (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), 4, IIC_iMOVi, []>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; @@ -3288,9 +4002,14 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), // Atomic operations intrinsics // +def MemBarrierOptOperand : AsmOperandClass { + let Name = "MemBarrierOpt"; + let ParserMethod = "parseMemBarrierOptOperand"; +} def memb_opt : Operand<i32> { let PrintMethod = "printMemBOption"; let ParserMatchClass = MemBarrierOptOperand; + let DecoderMethod = "DecodeMemBarrierOption"; } // memory barriers protect the atomic sequences @@ -3321,8 +4040,16 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def ABS : ARMPseudoInst< + (outs GPR:$dst), (ins GPR:$src), + 8, NoItinerary, []>; +} + let usesCustomInserter = 1 in { - let Uses = [CPSR] in { + let Defs = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; @@ -3437,44 +4164,47 @@ let usesCustomInserter = 1 in { } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexb", "\t$Rt, $addr", []>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, - "ldrexh", "\t$Rt, $addr", []>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, - "ldrex", "\t$Rt, $addr", []>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexh", "\t$Rt, $addr", []>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrex", "\t$Rt, $addr", []>; let hasExtraDefRegAllocReq = 1 in - def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), - NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; +def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; -def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; } let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), - NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; + (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} -// Clear-Exclusive is for disassembly only. -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", - [/* For disassembly only; pattern left blank */]>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } -// SWP/SWPB are deprecated in V6/V7 and for disassembly only. -let mayLoad = 1 in { -def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp", - [/* For disassembly only; pattern left blank */]>; -def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", - [/* For disassembly only; pattern left blank */]>; +// SWP/SWPB are deprecated in V6/V7. +let mayLoad = 1, mayStore = 1 in { +def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), + "swp", []>; +def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), + "swpb", []>; } //===----------------------------------------------------------------------===// @@ -3526,108 +4256,171 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, class ACI<dag oops, dag iops, string opc, string asm, IndexMode im = IndexModeNone> + : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, + opc, asm, "", []> { + let Inst{27-25} = 0b110; +} +class ACInoP<dag oops, dag iops, string opc, string asm, + IndexMode im = IndexModeNone> : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, - opc, asm, "", [/* For disassembly only; pattern left blank */]> { + opc, asm, "", []> { + let Inst{31-28} = 0b1111; let Inst{27-25} = 0b110; } - -multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{ - - def _OFFSET : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; +multiclass LdStCop<bit load, bit Dbit, string asm> { + def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def _PRE : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> { - let Inst{31-28} = op31_28; + def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!", IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def _POST : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> { - let Inst{31-28} = op31_28; + def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - def _OPTION : ACI<(outs), - !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option), - ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { - let Inst{31-28} = op31_28; + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_OFFSET : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; +} +multiclass LdSt2Cop<bit load, bit Dbit, string asm> { + def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_PRE : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!", - IndexModePre> { - let Inst{31-28} = op31_28; + def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!", IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_POST : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr", - IndexModePost> { - let Inst{31-28} = op31_28; + def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_OPTION : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option), - ops), - !strconcat(!strconcat(opc, "l"), cond), - "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { - let Inst{31-28} = op31_28; + def _OPTION : ACInoP<(outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; } } -defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">; -defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">; -defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">; -defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">; +defm LDC : LdStCop <1, 0, "ldc">; +defm LDCL : LdStCop <1, 1, "ldcl">; +defm STC : LdStCop <0, 0, "stc">; +defm STCL : LdStCop <0, 1, "stcl">; +defm LDC2 : LdSt2Cop<1, 0, "ldc2">; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l">; +defm STC2 : LdSt2Cop<0, 0, "stc2">; +defm STC2L : LdSt2Cop<0, 1, "stc2l">; //===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register -- for disassembly only +// Move between coprocessor and ARM core register. // class MovRCopro<string opc, bit direction, dag oops, dag iops, @@ -3660,8 +4453,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, imm:$CRm, imm:$opc2)]>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, - i32imm:$opc2), []>; + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3697,15 +4490,14 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, imm:$CRm, imm:$opc2)]>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, - i32imm:$opc2), []>; + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; -class MovRRCopro<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> +class MovRRCopro<string opc, bit direction, list<dag> pattern = []> : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { @@ -3730,8 +4522,7 @@ def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; -class MovRRCopro2<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> +class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { @@ -3758,20 +4549,22 @@ def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; //===----------------------------------------------------------------------===// -// Move between special register and ARM core register -- for disassembly only +// Move between special register and ARM core register // // Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", - [/* For disassembly only; pattern left blank */]> { +def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, apsr", []> { bits<4> Rd; let Inst{23-16} = 0b00001111; let Inst{15-12} = Rd; let Inst{7-4} = 0b0000; } -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", - [/* For disassembly only; pattern left blank */]> { +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>; + +def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, spsr", []> { bits<4> Rd; let Inst{23-16} = 0b01001111; let Inst{15-12} = Rd; @@ -3785,8 +4578,7 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", - [/* For disassembly only; pattern left blank */]> { + "msr", "\t$mask, $Rn", []> { bits<5> mask; bits<4> Rn; @@ -3800,8 +4592,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, } def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", - [/* For disassembly only; pattern left blank */]> { + "msr", "\t$mask, $a", []> { bits<5> mask; bits<12> a; @@ -4030,6 +4821,47 @@ def : ARMV5TEPat<(add GPR:$acc, def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; +// SXT/UXT with no rotate +let AddedComplexity = 16 in { +def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)), + (UXTAB GPR:$Rn, GPR:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)), + (UXTAH GPR:$Rn, GPR:$Rm, 0)>; +} + +def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>; +def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>; + +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)), + (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)), + (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>; + +// Atomic load/store patterns +def : ARMPat<(atomic_load_8 ldst_so_reg:$src), + (LDRBrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_8 addrmode_imm12:$src), + (LDRBi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_load_16 addrmode3:$src), + (LDRH addrmode3:$src)>; +def : ARMPat<(atomic_load_32 ldst_so_reg:$src), + (LDRrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_32 addrmode_imm12:$src), + (LDRi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val), + (STRBrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val), + (STRBi12 GPR:$val, addrmode_imm12:$ptr)>; +def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val), + (STRH GPR:$val, addrmode3:$ptr)>; +def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val), + (STRrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val), + (STRi12 GPR:$val, addrmode_imm12:$ptr)>; + //===----------------------------------------------------------------------===// // Thumb Support @@ -4070,7 +4902,103 @@ def : MnemonicAlias<"swi", "svc">; // Load / Store Multiple def : MnemonicAlias<"ldmfd", "ldm">; def : MnemonicAlias<"ldmia", "ldm">; +def : MnemonicAlias<"ldmea", "ldmdb">; def : MnemonicAlias<"stmfd", "stmdb">; def : MnemonicAlias<"stmia", "stm">; def : MnemonicAlias<"stmea", "stm">; +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the +// shift amount is zero (i.e., unspecified). +def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + Requires<[IsARM, HasV6]>; +def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + Requires<[IsARM, HasV6]>; + +// PUSH/POP aliases for STM/LDM +def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>; + +// SSAT/USAT optional shift operand. +def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn", + (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; +def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn", + (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; + + +// Extend instruction optional rotate operand. +def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb${p} $Rd, $Rm", + (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb16${p} $Rd, $Rm", + (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxth${p} $Rd, $Rm", + (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + +def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb${p} $Rd, $Rm", + (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb16${p} $Rd, $Rm", + (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxth${p} $Rd, $Rm", + (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + + +// RFE aliases +def : MnemonicAlias<"rfefa", "rfeda">; +def : MnemonicAlias<"rfeea", "rfedb">; +def : MnemonicAlias<"rfefd", "rfeia">; +def : MnemonicAlias<"rfeed", "rfeib">; +def : MnemonicAlias<"rfe", "rfeia">; + +// SRS aliases +def : MnemonicAlias<"srsfa", "srsda">; +def : MnemonicAlias<"srsea", "srsdb">; +def : MnemonicAlias<"srsfd", "srsia">; +def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srs", "srsia">; + +// QSAX == QSUBADDX +def : MnemonicAlias<"qsubaddx", "qsax">; +// SASX == SADDSUBX +def : MnemonicAlias<"saddsubx", "sasx">; +// SHASX == SHADDSUBX +def : MnemonicAlias<"shaddsubx", "shasx">; +// SHSAX == SHSUBADDX +def : MnemonicAlias<"shsubaddx", "shsax">; +// SSAX == SSUBADDX +def : MnemonicAlias<"ssubaddx", "ssax">; +// UASX == UADDSUBX +def : MnemonicAlias<"uaddsubx", "uasx">; +// UHASX == UHADDSUBX +def : MnemonicAlias<"uhaddsubx", "uhasx">; +// UHSAX == UHSUBADDX +def : MnemonicAlias<"uhsubaddx", "uhsax">; +// UQASX == UQADDSUBX +def : MnemonicAlias<"uqaddsubx", "uqasx">; +// UQSAX == UQSUBADDX +def : MnemonicAlias<"uqsubaddx", "uqsax">; +// USAX == USUBADDX +def : MnemonicAlias<"usubaddx", "usax">; + +// LDRSBT/LDRHT/LDRSHT post-index offset if optional. +// Note that the write-back output register is a dummy operand for MC (it's +// only meaningful for codegen), so we just pass zero here. +// FIXME: tblgen not cooperating with argument conversions. +//def : InstAlias<"ldrsbt${p} $Rt, $addr", +// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>; +//def : InstAlias<"ldrht${p} $Rt, $addr", +// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; +//def : InstAlias<"ldrsht${p} $Rt, $addr", +// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 0df62f4..7aad186 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -11,6 +11,35 @@ // //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NEON-specific Operands. +//===----------------------------------------------------------------------===// +def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } +def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } +def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 8; +}]> { + let ParserMatchClass = VectorIndex8Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 4; +}]> { + let ParserMatchClass = VectorIndex16Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 2; +}]> { + let ParserMatchClass = VectorIndex32Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -175,7 +204,8 @@ class VLDQQWBPseudo<InstrItinClass itin> (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; class VLDQQQQPseudo<InstrItinClass itin> - : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">; + : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, + "$src = $dst">; class VLDQQQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, @@ -190,6 +220,7 @@ class VLD1D<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), @@ -197,6 +228,7 @@ class VLD1Q<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -221,6 +253,7 @@ class VLD1DWB<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1QWB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), @@ -228,6 +261,7 @@ class VLD1QWB<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; @@ -252,12 +286,14 @@ class VLD1D3<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D3WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; @@ -280,6 +316,7 @@ class VLD1D4<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D4WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, @@ -288,6 +325,7 @@ class VLD1D4WB<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -310,6 +348,7 @@ class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, @@ -318,6 +357,7 @@ class VLD2Q<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; @@ -343,6 +383,7 @@ class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, @@ -351,6 +392,7 @@ class VLD2QWB<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; @@ -384,6 +426,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -402,6 +445,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -441,6 +485,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -459,6 +504,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -530,6 +576,7 @@ class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, (i32 (LoadOp addrmode6:$Rn)), imm:$lane))]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; } class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> @@ -541,6 +588,7 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, (i32 (LoadOp addrmode6oneL32:$Rn)), imm:$lane))]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; } class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), @@ -580,7 +628,9 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []>; + "$src = $Vd, $Rn.addr = $wb", []> { + let DecoderMethod = "DecodeVLD1LN"; +} def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -607,6 +657,7 @@ class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { @@ -642,6 +693,7 @@ class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { @@ -676,6 +728,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD3LN"; } def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { @@ -712,7 +765,9 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []>; + []> { + let DecoderMethod = "DecodeVLD3LN"; +} def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -748,6 +803,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN"; } def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { @@ -788,6 +844,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN" ; } def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { @@ -825,6 +882,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { let Pattern = [(set QPR:$dst, @@ -852,6 +910,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; @@ -864,12 +923,14 @@ class VLD1DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } class VLD1QDUPWB<bits<4> op7_4, string Dt> : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; @@ -891,6 +952,7 @@ class VLD2DUP<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; } def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; @@ -912,6 +974,7 @@ class VLD2DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; } def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; @@ -932,7 +995,8 @@ class VLD3DUP<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn), IIC_VLD3dup, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; @@ -954,7 +1018,8 @@ class VLD3DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; @@ -977,6 +1042,7 @@ class VLD4DUP<bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; @@ -1000,6 +1066,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; @@ -1045,6 +1112,7 @@ class VST1D<bits<4> op7_4, string Dt> IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), @@ -1052,6 +1120,7 @@ class VST1Q<bits<4> op7_4, string Dt> "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1075,6 +1144,7 @@ class VST1DWB<bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), @@ -1082,6 +1152,7 @@ class VST1QWB<bits<4> op7_4, string Dt> IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; @@ -1106,6 +1177,7 @@ class VST1D3<bits<4> op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1D3WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), @@ -1114,6 +1186,7 @@ class VST1D3WB<bits<4> op7_4, string Dt> IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8T : VST1D3<{0,0,0,?}, "8">; @@ -1137,6 +1210,7 @@ class VST1D4<bits<4> op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1D4WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), @@ -1145,6 +1219,7 @@ class VST1D4WB<bits<4> op7_4, string Dt> "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8Q : VST1D4<{0,0,?,?}, "8">; @@ -1167,6 +1242,7 @@ class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs), @@ -1175,6 +1251,7 @@ class VST2Q<bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; @@ -1200,6 +1277,7 @@ class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), @@ -1208,6 +1286,7 @@ class VST2QWB<bits<4> op7_4, string Dt> "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; @@ -1241,6 +1320,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1259,6 +1339,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1298,6 +1379,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1316,6 +1398,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -1381,6 +1464,7 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVST1LN"; } class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -1389,6 +1473,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ let Rm = 0b1111; + let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : VSTQLNPseudo<IIC_VST1ln> { @@ -1429,7 +1514,9 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]>; + addrmode6:$Rn, am6offset:$Rm))]> { + let DecoderMethod = "DecodeVST1LN"; +} class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : VSTQLNWBPseudo<IIC_VST1lnu> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), @@ -1465,6 +1552,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { @@ -1502,6 +1590,7 @@ class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", "$addr.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { @@ -1535,6 +1624,7 @@ class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { let Rm = 0b1111; + let DecoderMethod = "DecodeVST3LN"; } def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { @@ -1569,7 +1659,9 @@ class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3lnu, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", []> { + let DecoderMethod = "DecodeVST3LN"; +} def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -1604,6 +1696,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { @@ -1642,6 +1735,7 @@ class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { @@ -4039,6 +4133,7 @@ class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, ResTy, OpTy, OpNode> { let Inst{21-16} = op21_16; + let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", v8i16, v8i8, NEONvshlli>; @@ -4219,16 +4314,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm", def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; -let neverHasSideEffects = 1 in { -// Pseudo vector move instructions for QQ and QQQQ registers. This should -// be expanded after register allocation is completed. -def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), - NoItinerary, []>; - -def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), - NoItinerary, []>; -} // neverHasSideEffects - // VMOV : Vector Move (Immediate) let isReMaterializable = 1 in { @@ -4462,36 +4547,42 @@ def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, - ValueType Ty> - : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), - IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + ValueType Ty, Operand IdxTy> + : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), + IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy> - : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), - IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + ValueType ResTy, ValueType OpTy, Operand IdxTy> + : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), + IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), - imm:$lane)))]>; + VectorIndex32:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> { +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { + bits<3> lane; let Inst{19-17} = lane{2-0}; } -def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { + bits<2> lane; let Inst{19-18} = lane{1-0}; } -def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { + bits<1> lane; let Inst{19} = lane{0}; } -def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { + bits<3> lane; let Inst{19-17} = lane{2-0}; } -def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { + bits<2> lane; let Inst{19-18} = lane{1-0}; } -def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { + bits<1> lane; let Inst{19} = lane{0}; } @@ -4753,6 +4844,7 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // Vector Table Lookup and Table Extension. // VTBL : Vector Table Lookup +let DecoderMethod = "DecodeTBLInstruction" in { def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, @@ -4815,6 +4907,7 @@ def VTBX3Pseudo def VTBX4Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX4, "$orig = $dst", []>; +} // DecoderMethod = "DecodeTBLInstruction" //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index bfe83ec..cedb547 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -19,6 +19,19 @@ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def imm_sr_XFORM: SDNodeXForm<imm, [{ + unsigned Imm = N->getZExtValue(); + return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32); +}]>; +def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; } +def imm_sr : Operand<i32>, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; +}], imm_sr_XFORM> { + let PrintMethod = "printThumbSRImm"; + let ParserMatchClass = ThumbSRImmAsmOperand; +} + def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; @@ -30,10 +43,6 @@ def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; } -def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { - let ParserMatchClass = imm0_255_asmoperand; -} def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; @@ -69,8 +78,17 @@ def t_adrlabel : Operand<i32> { } // Scaled 4 immediate. -def t_imm_s4 : Operand<i32> { +def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } +def t_imm0_1020s4 : Operand<i32> { + let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_1020s4_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; } +def t_imm0_508s4 : Operand<i32> { let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } @@ -79,113 +97,129 @@ def t_imm_s4 : Operand<i32> { let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand<OtherVT> { let EncoderMethod = "getThumbBRTargetOpValue"; + let DecoderMethod = "DecodeThumbBROperand"; } def t_bcctarget : Operand<i32> { let EncoderMethod = "getThumbBCCTargetOpValue"; + let DecoderMethod = "DecodeThumbBCCTargetOperand"; } def t_cbtarget : Operand<i32> { let EncoderMethod = "getThumbCBTargetOpValue"; + let DecoderMethod = "DecodeThumbCmpBROperand"; } def t_bltarget : Operand<i32> { let EncoderMethod = "getThumbBLTargetOpValue"; + let DecoderMethod = "DecodeThumbBLTargetOperand"; } def t_blxtarget : Operand<i32> { let EncoderMethod = "getThumbBLXTargetOpValue"; + let DecoderMethod = "DecodeThumbBLXOffset"; } } -def MemModeRegThumbAsmOperand : AsmOperandClass { - let Name = "MemModeRegThumb"; - let SuperClasses = []; -} - -def MemModeImmThumbAsmOperand : AsmOperandClass { - let Name = "MemModeImmThumb"; - let SuperClasses = []; -} - // t_addrmode_rr := reg + reg // +def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; } def t_addrmode_rr : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; + let DecoderMethod = "DecodeThumbAddrModeRR"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } // t_addrmode_rrs := reg + reg // +// We use separate scaled versions because the Select* functions need +// to explicitly check for a matching constant and return false here so that +// the reg+imm forms will match instead. This is a horrible way to do that, +// as it forces tight coupling between the methods, but it's how selectiondag +// currently works. def t_addrmode_rrs1 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; + let DecoderMethod = "DecodeThumbAddrModeRR"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } def t_addrmode_rrs2 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let DecoderMethod = "DecodeThumbAddrModeRR"; let PrintMethod = "printThumbAddrModeRROperand"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } def t_addrmode_rrs4 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let DecoderMethod = "DecodeThumbAddrModeRR"; let PrintMethod = "printThumbAddrModeRROperand"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } // t_addrmode_is4 := reg + imm5 * 4 // +def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; } def t_addrmode_is4 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S4Operand"; + let ParserMatchClass = t_addrmode_is4_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_is2 := reg + imm5 * 2 // +def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; } def t_addrmode_is2 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S2Operand"; + let ParserMatchClass = t_addrmode_is2_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_is1 := reg + imm5 // +def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; } def t_addrmode_is1 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S1Operand"; + let ParserMatchClass = t_addrmode_is1_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_sp := sp + imm8 * 4 // +// FIXME: This really shouldn't have an explicit SP operand at all. It should +// be implicit, just like in the instruction encoding itself. +def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; } def t_addrmode_sp : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { let EncoderMethod = "getAddrModeThumbSPOpValue"; + let DecoderMethod = "DecodeThumbAddrModeSP"; let PrintMethod = "printThumbAddrModeSPOperand"; + let ParserMatchClass = t_addrmode_sp_asm_operand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_pc := <label> => pc + imm8 * 4 // def t_addrmode_pc : Operand<i32> { let EncoderMethod = "getAddrModePCOpValue"; - let ParserMatchClass = MemModeImmThumbAsmOperand; + let DecoderMethod = "DecodeThumbAddrModePC"; } //===----------------------------------------------------------------------===// @@ -207,68 +241,52 @@ def tADJCALLSTACKDOWN : Requires<[IsThumb, IsThumb1Only]>; } -// T1Disassembly - A simple class to make encoding some disassembly patterns -// easier and less verbose. -class T1Disassembly<bits<2> op1, bits<8> op2> +class T1SystemEncoding<bits<8> opc> : T1Encoding<0b101111> { - let Inst{9-8} = op1; - let Inst{7-0} = op2; + let Inst{9-8} = 0b11; + let Inst{7-0} = opc; } -def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x00>; // A8.6.110 +def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>, + T1SystemEncoding<0x00>, // A8.6.110 + Requires<[IsThumb2]>; -def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x10>; // A8.6.410 +def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>, + T1SystemEncoding<0x10>; // A8.6.410 -def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x20>; // A8.6.408 +def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>, + T1SystemEncoding<0x20>; // A8.6.408 -def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x30>; // A8.6.409 +def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>, + T1SystemEncoding<0x30>; // A8.6.409 -def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x40>; // A8.6.157 +def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>, + T1SystemEncoding<0x40>; // A8.6.157 -// The i32imm operand $val can be used by a debugger to store more information +// The imm operand $val can be used by a debugger to store more information // about the breakpoint. -def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> { +def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", + []>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b10; // A8.6.22 bits<8> val; let Inst{7-0} = val; } -def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe", - [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101101> { - // A8.6.156 - let Inst{9-5} = 0b10010; - let Inst{4} = 1; - let Inst{3} = 1; // Big-Endian - let Inst{2-0} = 0b000; -} - -def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle", - [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101101> { +def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", + []>, T1Encoding<0b101101> { + bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; let Inst{4} = 1; - let Inst{3} = 0; // Little-Endian + let Inst{3} = end; let Inst{2-0} = 0b000; } // Change Processor State is a system instruction -- for disassembly only. def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), - NoItinerary, "cps$imod $iflags", - [/* For disassembly only; pattern left blank */]>, + NoItinerary, "cps$imod $iflags", []>, T1Misc<0b0110011> { // A8.6.38 & B6.1.1 bit imod; @@ -277,6 +295,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), let Inst{4} = imod; let Inst{3} = 0; let Inst{2-0} = iflags; + let DecoderMethod = "DecodeThumbCPS"; } // For both thumb1 and thumb2. @@ -290,70 +309,70 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", let Inst{2-0} = dst; } -// PC relative add (ADR). -def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs", []>, - T1Encoding<{1,0,1,0,0,?}> { - // A6.2 & A8.6.10 - bits<3> dst; - bits<8> rhs; - let Inst{10-8} = dst; - let Inst{7-0} = rhs; -} - // ADD <Rd>, sp, #<imm8> -// This is rematerializable, which is particularly useful for taking the -// address of locals. -let isReMaterializable = 1 in -def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs", []>, +// FIXME: This should not be marked as having side effects, and it should be +// rematerializable. Clearing the side effect bit causes miscompilations, +// probably because the instruction can be moved around. +def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), + IIC_iALUi, "add", "\t$dst, $sp, $imm", []>, T1Encoding<{1,0,1,0,1,?}> { // A6.2 & A8.6.8 bits<3> dst; - bits<8> rhs; + bits<8> imm; let Inst{10-8} = dst; - let Inst{7-0} = rhs; + let Inst{7-0} = imm; + let DecoderMethod = "DecodeThumbAddSpecialReg"; } // ADD sp, sp, #<imm7> -def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $rhs", []>, +def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), + IIC_iALUi, "add", "\t$Rdn, $imm", []>, T1Misc<{0,0,0,0,0,?,?}> { // A6.2.5 & A8.6.8 - bits<7> rhs; - let Inst{6-0} = rhs; + bits<7> imm; + let Inst{6-0} = imm; + let DecoderMethod = "DecodeThumbAddSPImm"; } // SUB sp, sp, #<imm7> // FIXME: The encoding and the ASM string don't match up. -def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "sub\t$dst, $rhs", []>, +def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), + IIC_iALUi, "sub", "\t$Rdn, $imm", []>, T1Misc<{0,0,0,0,1,?,?}> { // A6.2.5 & A8.6.214 - bits<7> rhs; - let Inst{6-0} = rhs; + bits<7> imm; + let Inst{6-0} = imm; + let DecoderMethod = "DecodeThumbAddSPImm"; } +// Can optionally specify SP as a three operand instruction. +def : tInstAlias<"add${p} sp, sp, $imm", + (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; +def : tInstAlias<"sub${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>; + // ADD <Rm>, sp -def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>, +def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr, + "add", "\t$Rdn, $sp, $Rn", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T1 - bits<4> dst; - let Inst{7} = dst{3}; + bits<4> Rdn; + let Inst{7} = Rdn{3}; let Inst{6-3} = 0b1101; - let Inst{2-0} = dst{2-0}; + let Inst{2-0} = Rdn{2-0}; + let DecoderMethod = "DecodeThumbAddSPReg"; } // ADD sp, <Rm> -def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>, +def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, + "add", "\t$Rdn, $Rm", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T2 - bits<4> dst; + bits<4> Rm; let Inst{7} = 1; - let Inst{6-3} = dst; + let Inst{6-3} = Rm; let Inst{2-0} = 0b101; + let DecoderMethod = "DecodeThumbAddSPReg"; } //===----------------------------------------------------------------------===// @@ -390,11 +409,12 @@ let isCall = 1, Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins t_bltarget:$func, variable_ops), IIC_Br, - "bl\t$func", + (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, + "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]> { - bits<21> func; + bits<22> func; + let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; let Inst{13} = 1; let Inst{11} = 1; @@ -403,8 +423,8 @@ let isCall = 1, // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br, - "blx\t$func", + (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, + "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]> { bits<21> func; @@ -416,8 +436,8 @@ let isCall = 1, } // Also used for Thumb2 - def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx\t$func", + def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, + "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; @@ -440,43 +460,22 @@ let isCall = 1, Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], Uses = [R7, SP] in { // Also used for Thumb2 - def tBLr9 : TIx2<0b11110, 0b11, 1, - (outs), (ins pred:$p, t_bltarget:$func, variable_ops), - IIC_Br, "bl${p}\t$func", - [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsDarwin]> { - bits<21> func; - let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; - let Inst{10-0} = func{10-0}; - } + def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), + 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], + (tBL pred:$p, t_bltarget:$func)>, + Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : TIx2<0b11110, 0b11, 0, - (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - IIC_Br, "blx${p}\t$func", - [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]> { - bits<21> func; - let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; - let Inst{10-1} = func{10-1}; - let Inst{0} = 0; // func{0} is assumed zero - } + def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), + 4, IIC_Br, [(ARMcall tglobaladdr:$func)], + (tBLXi pred:$p, t_blxtarget:$func)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 - def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, - "blx${p}\t$func", - [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]>, - T1Special<{1,1,1,?}> { - // A6.2.3 & A8.6.24 - bits<4> func; - let Inst{6-3} = func; - let Inst{2-0} = 0b000; - } + def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), + 2, IIC_Br, [(ARMtcall GPR:$func)], + (tBLXr pred:$p, GPR:$func)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // ARMv4T def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), @@ -487,8 +486,8 @@ let isCall = 1, let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in - def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]>, + def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br, + "b", "\t$target", [(br bb:$target)]>, T1Encoding<{1,1,1,0,0,?}> { bits<11> target; let Inst{10-0} = target; @@ -498,8 +497,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // Just a pseudo for a tBL instruction. Needed to let regalloc know about // the clobber of LR. let Defs = [LR] in - def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target), - 4, IIC_Br, [], (tBL t_bltarget:$target)>; + def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p), + 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>; def tBR_JTr : tPseudoInst<(outs), (ins tGPR:$target, i32imm:$jt, i32imm:$id), @@ -522,31 +521,6 @@ let isBranch = 1, isTerminator = 1 in let Inst{7-0} = target; } -// Compare and branch on zero / non-zero -let isBranch = 1, isTerminator = 1 in { - def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, - "cbz\t$Rn, $target", []>, - T1Misc<{0,0,?,1,?,?,?}> { - // A8.6.27 - bits<6> target; - bits<3> Rn; - let Inst{9} = target{5}; - let Inst{7-3} = target{4-0}; - let Inst{2-0} = Rn; - } - - def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br, - "cbnz\t$cmp, $target", []>, - T1Misc<{1,0,?,1,?,?,?}> { - // A8.6.27 - bits<6> target; - bits<3> Rn; - let Inst{9} = target{5}; - let Inst{7-3} = target{4-0}; - let Inst{2-0} = Rn; - } -} - // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. @@ -562,9 +536,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Non-Darwin versions (the difference is R9). let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], Uses = [SP] in { - def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops), + def tTAILJMPdND : tPseudoExpand<(outs), + (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], - (tB t_brtarget:$dst)>, + (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotDarwin]>; def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], @@ -574,11 +549,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } -// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only +// A8.6.218 Supervisor Call (Software Interrupt) // A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1111 then SEE SVC let isCall = 1, Uses = [SP] in -def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br, +def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, "svc", "\t$imm", []>, Encoding16 { bits<8> imm; let Inst{15-12} = 0b1101; @@ -653,17 +628,17 @@ defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, let AddedComplexity = 10 in def tLDRSB : // A8.6.80 - T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_1, IIC_iLoad_bh_r, - "ldrsb", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + "ldrsb", "\t$Rt, $addr", + [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>; let AddedComplexity = 10 in def tLDRSH : // A8.6.84 - T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_2, IIC_iLoad_bh_r, - "ldrsh", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; + "ldrsh", "\t$Rt, $addr", + [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, @@ -678,7 +653,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", ".n\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, @@ -736,42 +711,53 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, // Load / store multiple Instructions. // -multiclass thumb_ldst_mult<string asm, InstrItinClass itin, - InstrItinClass itin_upd, bits<6> T1Enc, - bit L_bit> { - def IA : - T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>, - T1Encoding<T1Enc> { - bits<3> Rn; - bits<8> regs; - let Inst{10-8} = Rn; - let Inst{7-0} = regs; - } - def IA_UPD : - T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>, - T1Encoding<T1Enc> { - bits<3> Rn; - bits<8> regs; - let Inst{10-8} = Rn; - let Inst{7-0} = regs; - } -} - // These require base address to be written back or one of the loaded regs. let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, - {1,1,0,0,1,?}, 1>; - +def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; +} + +// Writeback version is just a pseudo, as there's no encoding difference. +// Writeback happens iff the base register is not in the destination register +// list. +def tLDMIA_UPD : + InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain, + "$Rn = $wb", IIC_iLoad_mu>, + PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> { + let Size = 2; + let OutOperandList = (outs GPR:$wb); + let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops); + let Pattern = []; + let isCodeGenOnly = 1; + let isPseudo = 1; + list<Predicate> Predicates = [IsThumb]; +} + +// There is no non-writeback version of STM for Thumb. let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, - {1,1,0,0,0,?}, 0>; +def tSTMIA_UPD : Thumb1I<(outs GPR:$wb), + (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), + AddrModeNone, 2, IIC_iStore_mu, + "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>, + T1Encoding<{1,1,0,0,0,?}> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; +} } // neverHasSideEffects +def : InstAlias<"ldm${p} $Rn!, $regs", + (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>, + Requires<[IsThumb, IsThumb1Only]>; + let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, @@ -876,7 +862,7 @@ def tADC : // A8.6.2 // Add immediate def tADDi3 : // A8.6.4 T1 - T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "add", "\t$Rd, $Rm, $imm3", [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { @@ -885,8 +871,8 @@ def tADDi3 : // A8.6.4 T1 } def tADDi8 : // A8.6.4 T2 - T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), - IIC_iALUi, + T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), + (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "add", "\t$Rdn, $imm8", [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>; @@ -920,10 +906,10 @@ def tAND : // A8.6.12 // ASR immediate def tASRri : // A8.6.14 - T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "asr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -962,7 +948,7 @@ def tCMNz : // A8.6.33 // CMP immediate let isCompare = 1, Defs = [CPSR] in { -def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi, +def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi, "cmp", "\t$Rn, $imm8", [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, T1General<{1,0,1,?,?}> { @@ -1003,7 +989,7 @@ def tEOR : // A8.6.45 // LSL immediate def tLSLri : // A8.6.88 - T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5), IIC_iMOVsi, "lsl", "\t$Rd, $Rm, $imm5", [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> { @@ -1020,10 +1006,10 @@ def tLSLrr : // A8.6.89 // LSR immediate def tLSRri : // A8.6.90 - T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "lsr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -1047,6 +1033,10 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, let Inst{10-8} = Rd; let Inst{7-0} = imm8; } +// Because we have an explicit tMOVSr below, we need an alias to handle +// the immediate "movs" form here. Blech. +def : tInstAlias <"movs $Rdn, $imm", + (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>; // A7-73: MOV(2) - mov setting flag. @@ -1077,10 +1067,19 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, // Multiply register let isCommutable = 1 in def tMUL : // A8.6.105 T1 - T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), - IIC_iMUL32, - "mul", "\t$Rdn, $Rm, $Rdn", - [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>; + Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2, + IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd", + [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>, + T1DataProcessing<0b1101> { + bits<3> Rd; + bits<3> Rn; + let Inst{5-3} = Rn; + let Inst{2-0} = Rd; + let AsmMatchConverter = "cvtThumbMultiply"; +} + +def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn, + pred:$p)>; // Move inverse register def tMVN : // A8.6.107 @@ -1132,6 +1131,9 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1142,7 +1144,7 @@ def tSBC : // A8.6.151 // Subtract immediate def tSUBi3 : // A8.6.210 T1 - T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "sub", "\t$Rd, $Rm, $imm3", [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> { @@ -1151,8 +1153,8 @@ def tSUBi3 : // A8.6.210 T1 } def tSUBi8 : // A8.6.210 T2 - T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), - IIC_iALUi, + T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), + (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "sub", "\t$Rdn, $imm8", [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>; @@ -1163,8 +1165,6 @@ def tSUBrr : // A8.6.212 "sub", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>; -// TODO: A7-96: STMIA - store multiple. - // Sign-extend byte def tSXTB : // A8.6.222 T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1216,12 +1216,13 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. // assembler. def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>, + IIC_iALUi, "adr{$p}\t$Rd, $addr", []>, T1Encoding<{1,0,1,0,0,?}> { bits<3> Rd; bits<8> addr; let Inst{10-8} = Rd; let Inst{7-0} = addr; + let DecoderMethod = "DecodeThumbAddSpecialReg"; } let neverHasSideEffects = 1, isReMaterializable = 1 in @@ -1361,6 +1362,31 @@ def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; +def : T1Pat<(atomic_load_8 t_addrmode_is1:$src), + (tLDRBi t_addrmode_is1:$src)>; +def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src), + (tLDRBr t_addrmode_rrs1:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_is2:$src), + (tLDRHi t_addrmode_is2:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src), + (tLDRHr t_addrmode_rrs2:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_is4:$src), + (tLDRi t_addrmode_is4:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src), + (tLDRr t_addrmode_rrs4:$src)>; +def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val), + (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>; +def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val), + (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val), + (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val), + (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val), + (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val), + (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>; + // Large immediate handling. // Two piece imms. @@ -1395,3 +1421,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2, IIC_Br, [(brind GPR:$Rm)], (tMOVr PC, GPR:$Rm, pred:$p)>; } + + +// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00 +// encoding is available on ARMv6K, but we don't differentiate that finely. +def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; + + +// For round-trip assembly/disassembly, we have to handle a CPS instruction +// without any iflags. That's not, strictly speaking, valid syntax, but it's +// a useful extention and assembles to defined behaviour (the insn does +// nothing). +def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; +def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index c2c6cbc..471ec29 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -12,13 +12,32 @@ //===----------------------------------------------------------------------===// // IT block predicate field +def it_pred_asmoperand : AsmOperandClass { + let Name = "ITCondCode"; + let ParserMethod = "parseITCondCode"; +} def it_pred : Operand<i32> { let PrintMethod = "printMandatoryPredicateOperand"; + let ParserMatchClass = it_pred_asmoperand; } // IT block condition mask +def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; } def it_mask : Operand<i32> { let PrintMethod = "printThumbITMask"; + let ParserMatchClass = it_mask_asmoperand; +} + +// t2_shift_imm: An integer that encodes a shift amount and the type of shift +// (asr or lsl). The 6-bit immediate encodes as: +// {5} 0 ==> lsl +// 1 asr +// {4-0} imm5 shift amount. +// asr #32 not allowed +def t2_shift_imm : Operand<i32> { + let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterImmAsmOperand; + let DecoderMethod = "DecodeT2ShifterImmOperand"; } // Shifted operands. No register controlled shifts for Thumb2. @@ -28,6 +47,8 @@ def t2_so_reg : Operand<i32>, // reg imm [shl,srl,sra,rotr]> { let EncoderMethod = "getT2SORegOpValue"; let PrintMethod = "printT2SOOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; let MIOperandInfo = (ops rGPR, i32imm); } @@ -50,6 +71,7 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ }]> { let ParserMatchClass = t2_so_imm_asmoperand; let EncoderMethod = "getT2SOImmOpValue"; + let DecoderMethod = "DecodeT2SOImm"; } // t2_so_imm_not - Match an immediate that is a complement @@ -65,11 +87,6 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; -/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. -def imm1_31 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 32; -}]>; - /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ @@ -96,17 +113,20 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 +def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} def t2addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { let PrintMethod = "printAddrModeImm12Operand"; let EncoderMethod = "getAddrModeImm12OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm12"; + let ParserMatchClass = t2addrmode_imm12_asmoperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } // t2ldrlabel := imm12 def t2ldrlabel : Operand<i32> { let EncoderMethod = "getAddrModeImm12OpValue"; + let PrintMethod = "printT2LdrLabelOperand"; } @@ -116,13 +136,36 @@ def t2adrlabel : Operand<i32> { } +// t2addrmode_posimm8 := reg + imm8 +def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} +def t2addrmode_posimm8 : Operand<i32> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemPosImm8OffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +// t2addrmode_negimm8 := reg - imm8 +def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} +def t2addrmode_negimm8 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemNegImm8OffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + // t2addrmode_imm8 := reg +/- imm8 +def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemImm8OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } def t2am_imm8_offset : Operand<i32>, @@ -130,38 +173,61 @@ def t2am_imm8_offset : Operand<i32>, [], [SDNPWantRoot]> { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let EncoderMethod = "getT2AddrModeImm8OffsetOpValue"; - let ParserMatchClass = MemMode5AsmOperand; + let DecoderMethod = "DecodeT2Imm8"; } // t2addrmode_imm8s4 := reg +/- (imm8 << 2) +def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} def t2addrmode_imm8s4 : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4Operand"; let EncoderMethod = "getT2AddrModeImm8s4OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8s4"; + let ParserMatchClass = MemImm8s4OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } +def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } def t2am_imm8s4_offset : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; + let EncoderMethod = "getT2Imm8s4OpValue"; + let DecoderMethod = "DecodeT2Imm8S4"; +} + +// t2addrmode_imm0_1020s4 := reg + (imm8 << 2) +def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { + let Name = "MemImm0_1020s4Offset"; +} +def t2addrmode_imm0_1020s4 : Operand<i32> { + let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; + let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm0_1020s4"; + let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand; + let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm); } // t2addrmode_so_reg := reg + (reg << imm2) +def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";} def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; let EncoderMethod = "getT2AddrModeSORegOpValue"; + let DecoderMethod = "DecodeT2AddrModeSOReg"; + let ParserMatchClass = t2addrmode_so_reg_asmoperand; let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } -// t2addrmode_reg := reg -// Used by load/store exclusive instructions. Useful to enable right assembly -// parsing and printing. Not used for any codegen matching. -// -def t2addrmode_reg : Operand<i32> { - let PrintMethod = "printAddrMode7Operand"; - let MIOperandInfo = (ops GPR); - let ParserMatchClass = MemMode7AsmOperand; +// Addresses for the TBB/TBH instructions. +def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; } +def addrmode_tbb : Operand<i32> { + let PrintMethod = "printAddrModeTBB"; + let ParserMatchClass = addrmode_tbb_asmoperand; + let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); +} +def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; } +def addrmode_tbh : Operand<i32> { + let PrintMethod = "printAddrModeTBH"; + let ParserMatchClass = addrmode_tbh_asmoperand; + let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); } //===----------------------------------------------------------------------===// @@ -419,47 +485,6 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, } -/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -/// unary operation that produces a value. These are predicable and can be -/// changed to modify CPSR. -multiclass T2I_un_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { - // shifted imm - def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, - opc, "\t$Rd, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { - let isAsCheapAsAMove = Cheap; - let isReMaterializable = ReMat; - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; - } - // register - def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, - opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } - // shifted register - def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, - opc, ".w\t$Rd, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - } -} - /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. @@ -500,21 +525,18 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, } // Assembly aliases for optional destination operand when it's the same // as the source operand. - def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; - def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; - def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; + cc_out:$s)>; } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need @@ -522,7 +544,27 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, string baseOpc, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">; + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + // Assembler aliases w/o the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>; + + // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>; +} /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen @@ -563,45 +605,28 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { /// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the /// instruction modifies the CPSR register. -let isCodeGenOnly = 1, Defs = [CPSR] in { +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { multiclass T2I_bin_s_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2TwoRegImm< + def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2ThreeReg< + def rr : T2sThreeReg< (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { - let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>; // shifted register - def rs : T2TwoRegShiftedReg< + def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } + opc, ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -614,9 +639,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, // in particular for taking the address of a local. let isReMaterializable = 1 in { def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { + (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -626,9 +651,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } // 12-bit imm def ri12 : T2I< - (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, + (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -644,9 +669,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{7-0} = imm{7-0}; } // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { + def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), + IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -658,9 +683,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } // shifted register def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), + (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -671,13 +696,13 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns /// for a binary operation that produces a value and use the carry /// bit. It's not predicable. -let Uses = [CPSR] in { +let Defs = [CPSR], Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -687,7 +712,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, // register def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -701,7 +726,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -710,64 +735,35 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } } -// Carry setting variants -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { - // shifted imm - def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), - 4, IIC_iALUi, - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; - // register - def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - 4, IIC_iALUr, - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { - let isCommutable = Commutable; - } - // shifted register - def rs : t2PseudoInst< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - 4, IIC_iALUsi, - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; -} -} - /// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register /// version is not needed since this is only for codegen. -let isCodeGenOnly = 1, Defs = [CPSR] in { +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2TwoRegImm< + def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>; // shifted register - def rs : T2TwoRegShiftedReg< + def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } + IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>; } } /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { +multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, + string baseOpc> { // 5-bit imm def ri : T2sTwoRegShiftImm< - (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi, + (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, opc, ".w\t$Rd, $Rm, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn @@ -784,20 +780,50 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { let Inst{15-12} = 0b1111; let Inst{7-4} = 0b0000; } + + // Optional destination register + def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + + // Assembler aliases w/o the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + + // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; } /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let isCompare = 1, Defs = [CPSR] in { multiclass T2I_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode> { + PatFrag opnode, string baseOpc> { +let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< - (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii, + (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii, opc, ".w\t$Rn, $imm", - [(opnode GPR:$Rn, t2_so_imm:$imm)]> { + [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -807,9 +833,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, } // register def rr : T2TwoRegCmp< - (outs), (ins GPR:$lhs, rGPR:$rhs), iir, - opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, rGPR:$rhs)]> { + (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir, + opc, ".w\t$Rn, $Rm", + [(opnode GPRnopc:$Rn, rGPR:$Rm)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -821,9 +847,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, } // shifted register def rs : T2OneRegCmpShiftedReg< - (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, + (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rn, $ShiftedRm", - [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> { + [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -831,55 +857,60 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, let Inst{11-8} = 0b1111; // Rd } } + + // Assembler aliases w/o the ".w" suffix. + // No alias here for 'rr' version as not all instantiations of this + // multiclass want one (CMP in particular, does not). + def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn, + t2_so_imm:$imm, pred:$p)>; + def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn, + t2_so_reg:$shift, + pred:$p)>; } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. multiclass T2I_ld<bit signed, bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { - def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii, + InstrItinClass iii, InstrItinClass iis, RegisterClass target, + PatFrag opnode> { + def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> { - let Inst{31-27} = 0b11111; - let Inst{26-25} = 0b00; + [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{31-25} = 0b1111100; let Inst{24} = signed; let Inst{23} = 1; let Inst{22-21} = opcod; let Inst{20} = 1; // load - - bits<4> Rt; - let Inst{15-12} = Rt; - - bits<17> addr; - let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn - let Inst{23} = addr{12}; // U + let Inst{15-12} = Rt; let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii, + def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> { + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = opcod; let Inst{20} = 1; // load + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; let Inst{11} = 1; // Offset: index==TRUE, wback==FALSE let Inst{10} = 1; // The P bit. - let Inst{8} = 0; // The W bit. - - bits<4> Rt; - let Inst{15-12} = Rt; - - bits<13> addr; - let Inst{19-16} = addr{12-9}; // Rn let Inst{9} = addr{8}; // U + let Inst{8} = 0; // The W bit. let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -895,12 +926,14 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{19-16} = addr{9-6}; // Rn let Inst{3-0} = addr{5-2}; // Rm let Inst{5-4} = addr{1-0}; // imm + + let DecoderMethod = "DecodeT2LoadShift"; } // FIXME: Is the pci variant actually needed? - def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii, + def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -918,10 +951,11 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. multiclass T2I_st<bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { - def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii, + InstrItinClass iii, InstrItinClass iis, RegisterClass target, + PatFrag opnode> { + def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> { + [(opnode target:$Rt, t2addrmode_imm12:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; @@ -936,9 +970,9 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii, + def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> { + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -956,9 +990,9 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{9} = addr{8}; // U let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> { + [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -977,146 +1011,81 @@ multiclass T2I_st<bits<2> opcod, string opc, /// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, ".w\t$Rd, $Rm, ror $rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, + opc, ".w\t$Rd, $Rm$rot", + [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[IsThumb2]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, "\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot", - [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", + [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. -multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, "\t$Rd, $Rm", []>, +class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, + opc, "\t$Rd, $Rm$rot", []>, Requires<[IsThumb2, HasT2ExtractPack]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr, - opc, "\t$Rd, $Rm, ror $rot", []>, - Requires<[IsThumb2, HasT2ExtractPack]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } /// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, - opc, "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def rr_rot : T2ThreeReg<(outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [(set rGPR:$Rd, (opnode rGPR:$Rn, - (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> + : T2ThreeReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", + [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } -// DO variant - disassembly only, no pattern - -multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> { - def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, - opc, "\t$Rd, $Rn, $Rm", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_exta_rrot_np<bits<3> opcod, string opc> + : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } //===----------------------------------------------------------------------===// @@ -1143,7 +1112,7 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin, // assembler. def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), (ins t2adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> { + IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -1160,6 +1129,8 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let Inst{26} = addr{11}; let Inst{14-12} = addr{10-8}; let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2Adr"; } let neverHasSideEffects = 1, isReMaterializable = 1 in @@ -1177,33 +1148,33 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, UnOpFrag<(load node:$Src)>>; // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(zextloadi16 node:$Src)>>; + rGPR, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(zextloadi8 node:$Src)>>; + rGPR, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(sextloadi16 node:$Src)>>; + rGPR, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(sextloadi8 node:$Src)>>; + rGPR, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>; + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)), @@ -1214,8 +1185,8 @@ def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)), // earlier? def : T2Pat<(extloadi1 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi1 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)), @@ -1223,8 +1194,8 @@ def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)), def : T2Pat<(extloadi8 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi8 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)), @@ -1232,8 +1203,8 @@ def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)), def : T2Pat<(extloadi16 t2addrmode_imm12:$addr), (t2LDRHi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi16 t2addrmode_imm8:$addr), - (t2LDRHi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr), + (t2LDRHi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), (t2LDRHs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), @@ -1247,83 +1218,86 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), // Indexed loads let mayLoad = 1, neverHasSideEffects = 1 in { -def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn), +def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} -def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, - "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; +def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, + "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; -def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), +def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn", - []>; + "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; } // mayLoad = 1, neverHasSideEffects = 1 -// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are -// for disassembly only. +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, "\t$Rt, $addr", []> { + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = type; let Inst{20} = 1; // load + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt; let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. - - bits<4> Rt; - bits<13> addr; - let Inst{15-12} = Rt; - let Inst{19-16} = addr{12-9}; - let Inst{7-0} = addr{7-0}; + let Inst{7-0} = addr{7-0}; } def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; @@ -1333,67 +1307,97 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; // Store -defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, BinOpFrag<(store node:$LHS, node:$RHS)>>; defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si, - BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, - BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>; + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores -def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "str", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iStore_iu, - "str", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + "str", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} +def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "strh", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strh", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strh", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} -def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strb", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} -def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iStore_iu, + "str", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_store rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strh", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, + "strb", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +// Pseudo-instructions for pattern matching the pre-indexed stores. We can't +// put the patterns on the instruction definitions directly as ISel wants +// the address base and offset to be separate operands, not a single +// complex operand like we represent the instructions themselves. The +// pseudos map between the two. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { +def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +} + // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly // only. @@ -1424,21 +1428,31 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), + (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { + let AsmMatchConverter = "cvtT2LdrdPre"; + let DecoderMethod = "DecodeT2LDRDPreInstruction"; +} -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; +def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), + (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), + IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", + "$addr.base = $wb", []>; -def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), - IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; +def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $wb", []> { + let AsmMatchConverter = "cvtT2StrdPre"; + let DecoderMethod = "DecodeT2STRDPreInstruction"; +} -def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), - IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; +def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, + t2am_imm8s4_offset:$imm), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", + "$addr.base = $wb", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. @@ -1463,9 +1477,9 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{11-0} = addr{11-0}; // imm12 } - def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc, + def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1496,6 +1510,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{19-16} = addr{9-6}; // Rn let Inst{3-0} = addr{5-2}; // Rm let Inst{5-4} = addr{1-0}; // imm2 + + let DecoderMethod = "DecodeT2LoadShift"; } } @@ -1507,11 +1523,11 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; // Load / store multiple Instructions. // -multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, +multiclass thumb2_ld_mult<string asm, InstrItinClass itin, InstrItinClass itin_upd, bit L_bit> { def IA : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> { + itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { bits<4> Rn; bits<16> regs; @@ -1522,11 +1538,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 0; // No writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def IA_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { bits<4> Rn; bits<16> regs; @@ -1537,11 +1554,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def DB : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> { + itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { bits<4> Rn; bits<16> regs; @@ -1552,11 +1570,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 0; // No writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def DB_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> { + itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { bits<4> Rn; bits<16> regs; @@ -1567,17 +1586,95 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } } let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; +defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; + +multiclass thumb2_st_mult<string asm, InstrItinClass itin, + InstrItinClass itin_upd, bit L_bit> { + def IA : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def IA_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def DB : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def DB_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } +} + let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; +defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; } // neverHasSideEffects @@ -1587,7 +1684,7 @@ defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; // let neverHasSideEffects = 1 in -def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, +def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -1596,6 +1693,10 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } +def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, + pred:$p, CPSR)>; +def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, + pred:$p, CPSR)>; // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, @@ -1610,12 +1711,20 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, - pred:$p, cc_out:$s)>, - Requires<[IsThumb2]>; +// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'. +// Use aliases to get that to play nice here. +def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, CPSR)>; +def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, CPSR)>; + +def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, zero_reg)>; +def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, zero_reg)>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, +def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", [(set rGPR:$Rd, imm0_65535:$imm)]> { let Inst{31-27} = 0b11110; @@ -1632,6 +1741,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, let Inst{26} = imm{11}; let Inst{14-12} = imm{10-8}; let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2MOVTWInstruction"; } def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -1639,7 +1749,7 @@ def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), let Constraints = "$src = $Rd" in { def t2MOVTi16 : T2I<(outs rGPR:$Rd), - (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi, + (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi, "movt", "\t$Rd, $imm", [(set rGPR:$Rd, (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { @@ -1657,6 +1767,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), let Inst{26} = imm{11}; let Inst{14-12} = imm{10-8}; let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2MOVTWInstruction"; } def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -1671,28 +1782,26 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -defm t2SXTB : T2I_ext_rrot<0b100, "sxtb", +def t2SXTB : T2I_ext_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm t2SXTH : T2I_ext_rrot<0b000, "sxth", +def t2SXTH : T2I_ext_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; +def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; -defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab", +def t2SXTAB : T2I_exta_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah", +def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">; - -// TODO: SXT(A){B|H}16 - done for disassembly only +def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; // Zero extenders let AddedComplexity = 16 in { -defm t2UXTB : T2I_ext_rrot<0b101, "uxtb", +def t2UXTB : T2I_ext_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm t2UXTH : T2I_ext_rrot<0b001, "uxth", +def t2UXTH : T2I_ext_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", +def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -1700,17 +1809,17 @@ defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", // instead so we can include a check for masking back in the upper // eight bits of the source into the lower eight bits of the result. //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), -// (t2UXTB16r_rot rGPR:$Src, 24)>, +// (t2UXTB16 rGPR:$Src, 3)>, // Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot rGPR:$Src, 8)>, + (t2UXTB16 rGPR:$Src, 1)>, Requires<[HasT2ExtractPack, IsThumb2]>; -defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab", +def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah", +def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">; +def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1723,27 +1832,37 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. +// +// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by +// AdjustInstrPostInstrSelection where we determine whether or not to +// set the "s" bit based on CPSR liveness. +// +// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen +// support for an optional CPSR definition that corresponds to the DAG +// node's second value. We can then eliminate the implicit def of CPSR. defm t2ADDS : T2I_bin_s_irs <0b1000, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +let hasPostISelHook = 1 in { defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", - BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", - BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, - node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, - node:$RHS)>>; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; +} // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +// FIXME: Eliminate them if we can write def : Pat patterns which defines +// CPSR and the implicit def of CPSR is not needed. defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -1760,23 +1879,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; let AddedComplexity = 1 in -def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm), +def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), +def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), (t2SBCri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; -let AddedComplexity = 1 in -def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), - (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), - (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -1893,8 +2007,7 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, let Inst{7-4} = op7_4; } -// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only - +// Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, @@ -1906,8 +2019,7 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsThumb2, HasThumb2DSP]>; -// Signed/Unsigned saturate -- for disassembly only - +// Signed/Unsigned saturate. class T2SatI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { @@ -1918,26 +2030,26 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rd; let Inst{19-16} = Rn; - let Inst{4-0} = sat_imm{4-0}; - let Inst{21} = sh{6}; + let Inst{4-0} = sat_imm; + let Inst{21} = sh{5}; let Inst{14-12} = sh{4-2}; let Inst{7-6} = sh{1-0}; } def t2SSAT: T2SatI< - (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), - NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", - [/* For disassembly only; pattern left blank */]> { + (outs rGPR:$Rd), + (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; let Inst{15} = 0; + let Inst{5} = 0; } def t2SSAT16: T2SatI< - (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary, - "ssat16", "\t$Rd, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]>, + (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary, + "ssat16", "\t$Rd, $sat_imm, $Rn", []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; @@ -1946,30 +2058,30 @@ def t2SSAT16: T2SatI< let Inst{21} = 1; // sh = '1' let Inst{14-12} = 0b000; // imm3 = '000' let Inst{7-6} = 0b00; // imm2 = '00' + let Inst{5-4} = 0b00; } def t2USAT: T2SatI< - (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), - NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", - [/* For disassembly only; pattern left blank */]> { + (outs rGPR:$Rd), + (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; let Inst{15} = 0; } -def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), +def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), NoItinerary, - "usat16", "\t$dst, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]>, + "usat16", "\t$Rd, $sat_imm, $Rn", []>, Requires<[IsThumb2, HasThumb2DSP]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; + let Inst{31-22} = 0b1111001110; let Inst{20} = 0; let Inst{15} = 0; let Inst{21} = 1; // sh = '1' let Inst{14-12} = 0b000; // imm3 = '000' let Inst{7-6} = 0b00; // imm2 = '00' + let Inst{5-4} = 0b00; } def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; @@ -1979,10 +2091,14 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>; -defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; -defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; -defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, + BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">; +defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, + BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">; +defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, + BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">; +defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, + BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), @@ -2090,7 +2206,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), } def t2SBFX: T2TwoRegBitFI< - (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -2099,7 +2215,7 @@ def t2SBFX: T2TwoRegBitFI< } def t2UBFX: T2TwoRegBitFI< - (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -2125,26 +2241,6 @@ let Constraints = "$src = $Rd" in { let msb{4-0} = imm{9-5}; let lsb{4-0} = imm{4-0}; } - - // GNU as only supports this form of bfi (w/ 4 arguments) - let isAsmParserOnly = 1 in - def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd), - (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit, - width_imm:$width), - IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", - []> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; // should be 0. - let Inst{25} = 1; - let Inst{24-20} = 0b10110; - let Inst{15} = 0; - let Inst{5} = 0; // should be 0. - - bits<5> lsbit; - bits<5> width; - let msb{4-0} = width; // Custom encoder => lsb+width-1 - let lsb{4-0} = lsbit; - } } defm t2ORN : T2I_bin_irs<0b0011, "orn", @@ -2152,13 +2248,53 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>, "t2ORN", 0, "">; +/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a +/// unary operation that produces a value. These are predicable and can be +/// changed to modify CPSR. +multiclass T2I_un_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { + // shifted imm + def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, + opc, "\t$Rd, $imm", + [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { + let isAsCheapAsAMove = Cheap; + let isReMaterializable = ReMat; + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; + } + // register + def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, + opc, ".w\t$Rd, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, + opc, ".w\t$Rd, $ShiftedRm", + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + } +} + // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, UnOpFrag<(not node:$Src)>, 1, 1>; - let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; @@ -2209,9 +2345,9 @@ def t2MLS: T2FourReg< let neverHasSideEffects = 1 in { let isCommutable = 1 in { def t2SMULL : T2MulLong<0b000, 0b0000, - (outs rGPR:$Rd, rGPR:$Ra), + (outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - "smull", "\t$Rd, $Ra, $Rn, $Rm", []>; + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>; def t2UMULL : T2MulLong<0b010, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -2468,7 +2604,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only +// Halfword multiple accumulate long: SMLAL<x><y> def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, @@ -2487,8 +2623,6 @@ def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), Requires<[IsThumb2, HasThumb2DSP]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -// These are for disassembly only. - def t2SMUAD: T2ThreeReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>, @@ -2513,7 +2647,7 @@ def t2SMUSDX:T2ThreeReg_mac< Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2ThreeReg_mac< +def t2SMLAD : T2FourReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", "\t$Rd, $Rn, $Rm, $Ra", []>, @@ -2532,20 +2666,20 @@ def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", - "\t$Ra, $Rd, $Rm, $Rn", []>, + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; //===----------------------------------------------------------------------===// @@ -2613,10 +2747,10 @@ def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), (t2REVSH rGPR:$Rm)>; def t2PKHBT : T2ThreeReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), - (and (shl rGPR:$Rm, lsl_amt:$sh), + (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; @@ -2625,9 +2759,9 @@ def t2PKHBT : T2ThreeReg< let Inst{5} = 0; // BT form let Inst{4} = 0; - bits<8> sh; - let Inst{14-12} = sh{7-5}; - let Inst{7-6} = sh{4-3}; + bits<5> sh; + let Inst{14-12} = sh{4-2}; + let Inst{7-6} = sh{1-0}; } // Alternate cases for PKHBT where identities eliminate some nodes. @@ -2635,16 +2769,16 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), - (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, + (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. def t2PKHTB : T2ThreeReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh), IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), - (and (sra rGPR:$Rm, asr_amt:$sh), + (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; @@ -2653,19 +2787,19 @@ def t2PKHTB : T2ThreeReg< let Inst{5} = 1; // TB form let Inst{4} = 0; - bits<8> sh; - let Inst{14-12} = sh{7-5}; - let Inst{7-6} = sh{4-3}; + bits<5> sh; + let Inst{14-12} = sh{4-2}; + let Inst{7-6} = sh{1-0}; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), - (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, + (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), - (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, + (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; //===----------------------------------------------------------------------===// @@ -2673,14 +2807,14 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">; -def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm), - (t2CMPri GPR:$lhs, t2_so_imm:$imm)>; -def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs), - (t2CMPrr GPR:$lhs, rGPR:$rhs)>; -def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs), - (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), + (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs), + (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs), + (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>; //FIXME: Disable CMN, as CCodes are backwards from compare expectations // Compare-to-zero still works out, just not the relationals @@ -2688,20 +2822,23 @@ def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs), // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; + BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>, + "t2CMNz">; //def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), // (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), - (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), + (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>; defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, + "t2TST">; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, + "t2TEQ">; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use @@ -2723,7 +2860,7 @@ def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), // FIXME: Pseudo-ize these. For now, just mark codegen only. let isCodeGenOnly = 1 in { let isMoveImm = 1 in -def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm), +def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm), IIC_iCMOVi, "movw", "\t$Rd, $imm", []>, RegConstraint<"$false = $Rd"> { @@ -2807,20 +2944,19 @@ def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, } def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dsb", "\t$opt", - [/* For disassembly only; pattern left blank */]>, + "dsb", "\t$opt", []>, Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } -// ISB has only full system option -- for disassembly only -def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasV7]> { +def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, + "isb", "\t$opt", + []>, Requires<[IsThumb2, HasDB]> { + bits<4> opt; let Inst{31-4} = 0xf3bf8f6; - let Inst{3-0} = 0b1111; + let Inst{3-0} = opt; } class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, @@ -2858,28 +2994,27 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", []> { + bits<4> Rt; + bits<12> addr; let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; - let Inst{11-8} = 0b1111; - let Inst{7-0} = 0b00000000; // imm8 = 0 - - bits<4> Rt; - bits<4> addr; - let Inst{19-16} = addr; + let Inst{19-16} = addr{11-8}; let Inst{15-12} = Rt; + let Inst{11-8} = 0b1111; + let Inst{7-0} = addr{7-0}; } let hasExtraDefRegAllocReq = 1 in def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), - (ins t2addrmode_reg:$addr), + (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { @@ -2890,33 +3025,33 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), - (ins rGPR:$Rt, t2addrmode_reg:$addr), + (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", []>; def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), - (ins rGPR:$Rt, t2addrmode_reg:$addr), + (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, + t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", []> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000100; - let Inst{7-0} = 0b00000000; // imm8 = 0 - bits<4> Rd; - bits<4> addr; bits<4> Rt; - let Inst{11-8} = Rd; - let Inst{19-16} = addr; + bits<12> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000100; + let Inst{19-16} = addr{11-8}; let Inst{15-12} = Rt; + let Inst{11-8} = Rd; + let Inst{7-0} = addr{7-0}; } } let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), - (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { @@ -2924,9 +3059,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), let Inst{11-8} = Rt2; } -// Clear-Exclusive is for disassembly only. -def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", - [/* For disassembly only; pattern left blank */]>, +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; @@ -2986,8 +3119,8 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in -def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br, - "b.w\t$target", +def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, + "b", ".w\t$target", [(br bb:$target)]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; @@ -3009,15 +3142,13 @@ def t2BR_JT : t2PseudoInst<(outs), // FIXME: Add a non-pc based case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), - 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), - 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; -def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, - "tbb", "\t[$Rn, $Rm]", []> { +def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, + "tbb", "\t$addr", []> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3025,10 +3156,12 @@ def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, let Inst{15-5} = 0b11110000000; let Inst{4} = 0; // B form let Inst{3-0} = Rm; + + let DecoderMethod = "DecodeThumbTableBranch"; } -def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, - "tbh", "\t[$Rn, $Rm, lsl #1]", []> { +def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, + "tbh", "\t$addr", []> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3036,13 +3169,15 @@ def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, let Inst{15-5} = 0b11110000000; let Inst{4} = 1; // H form let Inst{3-0} = Rm; + + let DecoderMethod = "DecodeThumbTableBranch"; } } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier // FIXME: should be able to write a pattern for ARMBrcond, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects ", "two operands. :( let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, "b", ".w\t$target", @@ -3060,6 +3195,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{13} = target{18}; let Inst{21-16} = target{17-12}; let Inst{10-0} = target{11-1}; + + let DecoderMethod = "DecodeThumb2BCCInstruction"; } // Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so @@ -3068,9 +3205,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin version. let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], Uses = [SP] in - def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops), + def tTAILJMPd: tPseudoExpand<(outs), + (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], - (t2B uncondbrtarget:$dst)>, + (t2B uncondbrtarget:$dst, pred:$p)>, Requires<[IsThumb2, IsDarwin]>; } @@ -3087,30 +3225,55 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), bits<4> mask; let Inst{7-4} = cc; let Inst{3-0} = mask; + + let DecoderMethod = "DecodeIT"; } // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", - [/* For disassembly only; pattern left blank */]> { +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> { + bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; let Inst{25-20} = 0b111100; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - - bits<4> func; let Inst{19-16} = func; + let Inst{15-0} = 0b1000111100000000; +} + +// Compare and branch on zero / non-zero +let isBranch = 1, isTerminator = 1 in { + def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + "cbz\t$Rn, $target", []>, + T1Misc<{0,0,?,1,?,?,?}>, + Requires<[IsThumb2]> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } + + def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + "cbnz\t$Rn, $target", []>, + T1Misc<{1,0,?,1,?,?,?}>, + Requires<[IsThumb2]> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } } -// Change Processor State is a system instruction -- for disassembly and -// parsing only. + +// Change Processor State is a system instruction. // FIXME: Since the asm parser has currently no clean way to handle optional // operands, create 3 versions of the same instruction. Once there's a clean // framework to represent optional operands, change this behavior. class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, - !strconcat("cps", asm_op), - [/* For disassembly only; pattern left blank */]> { + !strconcat("cps", asm_op), []> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -3126,6 +3289,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, let Inst{8} = M; let Inst{7-5} = iflags; let Inst{4-0} = mode; + let DecoderMethod = "DecodeT2CPSInstruction"; } let M = 1 in @@ -3135,14 +3299,12 @@ let mode = 0, M = 0 in def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod.w\t$iflags">; let imod = 0, iflags = 0, M = 1 in - def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">; + def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -// Helper class for disassembly only. class T2I_hint<bits<8> op7_0, string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]> { + : T2I<(outs), (ins), NoItinerary, opc, asm, []> { let Inst{31-20} = 0xf3a; let Inst{19-16} = 0b1111; let Inst{15-14} = 0b10; @@ -3158,20 +3320,17 @@ def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; def t2SEV : T2I_hint<0b00000100, "sev", ".w">; def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { - let Inst{31-20} = 0xf3a; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - let Inst{10-8} = 0b000; - let Inst{7-4} = 0b1111; - bits<4> opt; + let Inst{31-20} = 0b111100111010; + let Inst{19-16} = 0b1111; + let Inst{15-8} = 0b10000000; + let Inst{7-4} = 0b1111; let Inst{3-0} = opt; } -// Secure Monitor Call is a system instruction -- for disassembly only +// Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", - [/* For disassembly only; pattern left blank */]> { +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; let Inst{15-12} = 0b1000; @@ -3180,32 +3339,30 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", let Inst{19-16} = opt; } -class T2SRS<bits<12> op31_20, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { - let Inst{31-20} = op31_20{11-0}; - bits<5> mode; + let Inst{31-25} = 0b1110100; + let Inst{24-23} = Op; + let Inst{22} = 0; + let Inst{21} = W; + let Inst{20-16} = 0b01101; + let Inst{15-5} = 0b11000000000; let Inst{4-0} = mode{4-0}; } -// Store Return State is a system instruction -- for disassembly only -def t2SRSDBW : T2SRS<0b111010000010, - (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSDB : T2SRS<0b111010000000, - (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSIAW : T2SRS<0b111010011010, - (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSIA : T2SRS<0b111010011000, - (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", - [/* For disassembly only; pattern left blank */]>; - -// Return From Exception is a system instruction -- for disassembly only +// Store Return State is a system instruction. +def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary, + "srsdb", "\tsp!, $mode", []>; +def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary, + "srsdb","\tsp, $mode", []>; +def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary, + "srsia","\tsp!, $mode", []>; +def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary, + "srsia","\tsp, $mode", []>; +// Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { @@ -3277,53 +3434,186 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), imm:$cp))]>, Requires<[IsThumb2]>; +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), + NoItinerary, []>, Requires<[IsThumb2]>; +} + +//===----------------------------------------------------------------------===// +// Coprocessor load/store -- for disassembly only +// +class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm> + : T2I<oops, iops, NoItinerary, opc, asm, []> { + let Inst{31-28} = op31_28; + let Inst{27-25} = 0b110; +} + +multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { + def _OFFSET : T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _PRE : T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _POST: T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset"> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _OPTION : T2CI<op31_28, (outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; + } +} + +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; + + //===----------------------------------------------------------------------===// // Move between special register and ARM core register -- for disassembly only // +// Move to ARM core register from Special Register -class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { - let Inst{31-20} = op31_20{11-0}; - let Inst{15-14} = op15_14{1-0}; - let Inst{12} = op12{0}; +// A/R class MRS. +// +// A/R class can only move from CPSR or SPSR. +def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>, + Requires<[IsThumb2,IsARClass]> { + bits<4> Rd; + let Inst{31-12} = 0b11110011111011111000; + let Inst{11-8} = Rd; + let Inst{7-0} = 0b0000; } -class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> { +def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; + +def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>, + Requires<[IsThumb2,IsARClass]> { bits<4> Rd; + let Inst{31-12} = 0b11110011111111111000; + let Inst{11-8} = Rd; + let Inst{7-0} = 0b0000; +} + +// M class MRS. +// +// This MRS has a mask field in bits 7-0 and can take more values than +// the A/R class (a full msr_mask). +def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, + "mrs", "\t$Rd, $mask", []>, + Requires<[IsThumb2,IsMClass]> { + bits<4> Rd; + bits<8> mask; + let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; let Inst{19-16} = 0b1111; + let Inst{7-0} = mask; } -def t2MRS : T2MRS<0b111100111110, 0b10, 0, - (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", - [/* For disassembly only; pattern left blank */]>; -def t2MRSsys : T2MRS<0b111100111111, 0b10, 0, - (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", - [/* For disassembly only; pattern left blank */]>; // Move from ARM core register to Special Register // +// A/R class MSR. +// // No need to have both system and application versions, the encodings are the // same and the assembly parser has no way to distinguish between them. The mask // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. -def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, - 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn), - NoItinerary, "msr", "\t$mask, $Rn", - [/* For disassembly only; pattern left blank */]> { +def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), + NoItinerary, "msr", "\t$mask, $Rn", []>, + Requires<[IsThumb2,IsARClass]> { bits<5> mask; bits<4> Rn; - let Inst{19-16} = Rn; + let Inst{31-21} = 0b11110011100; let Inst{20} = mask{4}; // R Bit - let Inst{13} = 0b0; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; let Inst{11-8} = mask{3-0}; + let Inst{7-0} = 0; } +// M class MSR. +// +// Move from ARM core register to Special Register +def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), + NoItinerary, "msr", "\t$SYSm, $Rn", []>, + Requires<[IsThumb2,IsMClass]> { + bits<8> SYSm; + bits<4> Rn; + let Inst{31-21} = 0b11110011100; + let Inst{20} = 0b0; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; + let Inst{7-0} = SYSm; +} + + //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register // @@ -3389,13 +3679,12 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, - (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - []>; + (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), []>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, - (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), []>; + (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), []>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3465,3 +3754,269 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{19-16} = CRn; let Inst{23-20} = opc1; } + + + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// SXT/UXT with no rotate +let AddedComplexity = 16 in { +def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +} + +def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + +// Atomic load/store patterns +def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr), + (t2LDRBi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr), + (t2LDRBs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr), + (t2LDRHi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr), + (t2LDRHi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr), + (t2LDRHs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr), + (t2LDRi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr), + (t2LDRi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr), + (t2LDRs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val), + (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val), + (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val), + (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>; + + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +// Aliases for ADC without the ".w" optional width specifier. +def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm", + (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm", + (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for SBC without the ".w" optional width specifier. +def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm", + (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", + (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for ADD without the ".w" optional width specifier. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", + (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", + (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for SUB without the ".w" optional width specifier. +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", + (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", + (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Alias for compares without the ".w" optional width specifier. +def : t2InstAlias<"cmn${p} $Rn, $Rm", + (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"teq${p} $Rn, $Rm", + (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"tst${p} $Rn, $Rm", + (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; + +// Memory barriers +def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>; + +// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional +// width specifier. +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; + +// Alias for MVN without the ".w" optional width specifier. +def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", + (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", + (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; + +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the +// shift amount is zero (i.e., unspecified). +def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", + (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", + (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + +// PUSH/POP aliases for STM/LDM +def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; + +// Alias for REV/REV16/REVSH without the ".w" optional width specifier. +def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>; + + +// Alias for RSB without the ".w" optional width specifier, and with optional +// implied destination register. +def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm", + (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $imm", + (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm", + (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm", + (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, + cc_out:$s)>; + +// SSAT/USAT optional shift operand. +def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn", + (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>; +def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn", + (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>; + +// STM w/o the .w suffix. +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; + +// Alias for STR, STRB, and STRH without the ".w" optional +// width specifier. +def : t2InstAlias<"str${p} $Rt, $addr", + (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"strb${p} $Rt, $addr", + (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"strh${p} $Rt, $addr", + (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"str${p} $Rt, $addr", + (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"strb${p} $Rt, $addr", + (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"strh${p} $Rt, $addr", + (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; + +// Extend instruction optional rotate operand. +def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"sxtb${p} $Rd, $Rm", + (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtb16${p} $Rd, $Rm", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxth${p} $Rd, $Rm", + (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtb${p}.w $Rd, $Rm", + (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxth${p}.w $Rd, $Rm", + (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtb${p} $Rd, $Rm", + (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtb16${p} $Rd, $Rm", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxth${p} $Rd, $Rm", + (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"uxtb${p}.w $Rd, $Rm", + (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxth${p}.w $Rd, $Rm", + (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +// Extend instruction w/o the ".w" optional width specifier. +def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", + (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", + (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; + +def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", + (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", + (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index f1f3cb9..e746cf2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -31,18 +31,34 @@ def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; // Operand Definitions. // +// 8-bit floating-point immediate encodings. +def FPImmOperand : AsmOperandClass { + let Name = "FPImm"; + let ParserMethod = "parseFPImm"; +} + def vfp_f32imm : Operand<f32>, PatLeaf<(f32 fpimm), [{ - return ARM::getVFPf32Imm(N->getValueAPF()) != -1; - }]> { - let PrintMethod = "printVFPf32ImmOperand"; + return ARM_AM::getFP32Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP32Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; } def vfp_f64imm : Operand<f64>, PatLeaf<(f64 fpimm), [{ - return ARM::getVFPf64Imm(N->getValueAPF()) != -1; - }]> { - let PrintMethod = "printVFPf64ImmOperand"; + return ARM_AM::getFP64Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP64Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; } @@ -385,26 +401,26 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. -def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", +def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", +def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, @@ -511,14 +527,25 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, } def VMOVRRS : AVConv3I<0b11000101, 0b1010, - (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), - IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2), + IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { + bits<5> src1; + bits<4> Rt; + bits<4> Rt2; + + // Encode instruction operands. + let Inst{3-0} = src1{3-0}; + let Inst{5} = src1{4}; + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{7-6} = 0b00; // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + let DecoderMethod = "DecodeVMOVRRS"; } } // neverHasSideEffects @@ -552,11 +579,24 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { + // Instruction operands. + bits<5> dst1; + bits<4> src1; + bits<4> src2; + + // Encode instruction operands. + let Inst{3-0} = dst1{3-0}; + let Inst{5} = dst1{4}; + let Inst{15-12} = src1; + let Inst{19-16} = src2; + let Inst{7-6} = 0b00; // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + let DecoderMethod = "DecodeVMOVSRR"; } // FMRDH: SPR -> GPR @@ -1084,45 +1124,42 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$Dd, $imm", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { - // Instruction operands. - bits<5> Dd; - bits<32> imm; - - // Encode instruction operands. - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - let Inst{19} = imm{31}; - let Inst{18-16} = imm{22-20}; - let Inst{3-0} = imm{19-16}; + bits<5> Dd; + bits<8> imm; - // Encode remaining instruction bits. let Inst{27-23} = 0b11101; + let Inst{22} = Dd{4}; let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Dd{3-0}; let Inst{11-9} = 0b101; let Inst{8} = 1; // Double precision. let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; } def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, "vmov", ".f32\t$Sd, $imm", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { - // Instruction operands. - bits<5> Sd; - bits<32> imm; - - // Encode instruction operands. - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - let Inst{19} = imm{31}; // The immediate is handled as a double. - let Inst{18-16} = imm{22-20}; - let Inst{3-0} = imm{19-16}; + bits<5> Sd; + bits<8> imm; - // Encode remaining instruction bits. let Inst{27-23} = 0b11101; + let Inst{22} = Sd{0}; let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Sd{4-1}; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision. let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; } } + +//===----------------------------------------------------------------------===// +// Assembler aliases. +// + +def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; + diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c6efea1..faa8ba7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -763,9 +764,9 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode) { switch (Opc) { case ARM::LDRi12: - return ARM::LDR_PRE; + return ARM::LDR_PRE_IMM; case ARM::STRi12: - return ARM::STR_PRE; + return ARM::STR_PRE_IMM; case ARM::VLDRS: return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; case ARM::VLDRD: @@ -789,9 +790,9 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode) { switch (Opc) { case ARM::LDRi12: - return ARM::LDR_POST; + return ARM::LDR_POST_IMM; case ARM::STRi12: - return ARM::STR_POST; + return ARM::STR_POST_IMM; case ARM::VLDRS: return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; case ARM::VLDRD: @@ -892,12 +893,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - unsigned Offset = 0; - if (isAM2) - Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - else if (!isAM5) - Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; - if (isAM5) { // VLDM[SD}_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the @@ -911,28 +906,44 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, .addReg(MO.getReg(), (isLd ? getDefRegState(true) : getKillRegState(MO.isKill()))); } else if (isLd) { - if (isAM2) - // LDR_PRE, LDR_POST, - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); - else + if (isAM2) { + // LDR_PRE, LDR_POST + if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } else { + int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + } + } else { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2LDR_PRE, t2LDR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } } else { MachineOperand &MO = MI->getOperand(0); - if (isAM2) + // FIXME: post-indexed stores use am2offset_imm, which still encodes + // the vestigal zero-reg offset register. When that's fixed, this clause + // can be removed entirely. + if (isAM2 && NewOpc == ARM::STR_POST_IMM) { + int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); - else + } else { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } } MBB.erase(MBBI); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 7411b59..daa126d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -14,7 +14,7 @@ #include "ARM.h" #include "ARMAsmPrinter.h" -#include "ARMMCExpr.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/Constants.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/MC/MCExpr.h" diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index 76eb496..036822d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -182,8 +182,10 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; -def FPSCR : ARMReg<1, "fpscr">; -def ITSTATE : ARMReg<2, "itstate">; +def APSR : ARMReg<1, "apsr">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. def FPSID : ARMReg<0, "fpsid">; @@ -213,6 +215,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), }]; } +// GPRs without the PC. Some ARM instructions do not allow the PC in +// certain operand slots, particularly as the destination. Primarily +// useful for disassembly. +def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + +// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the +// implied SP argument list. +// FIXME: It would be better to not use this at all and refactor the +// instructions to not have SP an an explicit argument. That makes +// frame index resolution a bit trickier, though. +def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>; + // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC // or SP (R13 or R15) are used. The ARM ISA refers to these operands @@ -328,5 +347,6 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { + let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index ef0aaf2..a3a3d58 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -138,13 +138,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // Adjust parameters for memset, EABI uses format (ptr, size, value), // GNU library uses (ptr, value, size) // See RTABI section 4.3.4 -SDValue -ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const -{ +SDValue ARMSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { // Use default for non AAPCS subtargets if (!Subtarget->isAAPCS_ABI()) return SDValue(); @@ -155,7 +154,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index ec1bf5c..6419a73 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -14,10 +14,27 @@ #ifndef ARMSELECTIONDAGINFO_H #define ARMSELECTIONDAGINFO_H +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { +namespace ARM_AM { + static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { + switch (Opcode) { + default: return ARM_AM::no_shift; + case ISD::SHL: return ARM_AM::lsl; + case ISD::SRL: return ARM_AM::lsr; + case ISD::SRA: return ARM_AM::asr; + case ISD::ROTR: return ARM_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ARMISD::RRX: return ARM_AM::rrx; + } + } +} // end namespace ARM_AM + class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 1cab9e4..247d6be 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -53,11 +53,14 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVMLxForwarding(false) , SlowFPBrcc(false) , InThumbMode(false) + , InNaClMode(false) , HasThumb2(false) + , IsMClass(false) , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , UseMovt(false) + , SupportsTailCall(false) , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) @@ -111,6 +114,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); + const Triple &T = getTargetTriple(); + SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index c650872..b63e108 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -70,9 +70,16 @@ protected: /// InThumbMode - True if compiling for Thumb, false for ARM. bool InThumbMode; + /// InNaClMode - True if targeting Native Client + bool InNaClMode; + /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; + /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - + /// v6m, v7m for example. + bool IsMClass; + /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; @@ -86,6 +93,11 @@ protected: /// imms (including global addresses). bool UseMovt; + /// SupportsTailCall - True if the OS supports tail call. The dynamic linker + /// must be able to synthesize call stubs for interworking between ARM and + /// Thumb. + bool SupportsTailCall; + /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF /// only so far) bool HasFP16; @@ -209,6 +221,9 @@ protected: const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } + bool isTargetNaCl() const { + return TargetTriple.getOS() == Triple::NativeClient; + } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } @@ -218,10 +233,13 @@ protected: bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } + bool isMClass() const { return IsMClass; } + bool isARClass() const { return !IsMClass; } bool isR9Reserved() const { return IsR9Reserved; } bool useMovt() const { return UseMovt && hasV6T2Ops(); } + bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return AllowsUnalignedMem; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index f0b176a..96b1e89 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -15,77 +15,50 @@ #include "ARM.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - - if (TheTriple.isOSWindows()) { - llvm_unreachable("ARM does not support Windows COFF format"); - return NULL; - } - - return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); -} +static cl::opt<bool> +EnableGlobalMerge("global-merge", cl::Hidden, + cl::desc("Enable global merge pass"), + cl::init(true)); extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); - - // Register the MC Code Emitter - TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend); - TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer); - TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer); - } /// TargetMachine ctor - Create an ARM architecture model. /// -ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { - DefRelocModel = getRelocationModel(); - // Default to soft float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Soft; } -ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget), +ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" - "v128:32:128-v64:32:64-n32") : + "v128:32:128-v64:32:64-n32-S32") : + Subtarget.isAAPCS_ABI() ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "v128:64:128-v64:64:64-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32")), + "v128:64:128-v64:64:64-n32-S32")), ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), @@ -95,20 +68,24 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, "support ARM mode execution!"); } -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : ARMBaseTargetMachine(T, TT, CPU, FS), +ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "i16:16:32-i8:8:32-i1:8:32-" - "v128:32:128-v64:32:64-a:0:32-n32") : + "v128:32:128-v64:32:64-a:0:32-n32-S32") : + Subtarget.isAAPCS_ABI() ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "i16:16:32-i8:8:32-i1:8:32-" + "v128:64:128-v64:64:64-a:0:32-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32")), + "v128:64:128-v64:64:64-a:0:32-n32-S32")), ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), @@ -117,10 +94,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -// Pass Pipeline Configuration bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) PM.add(createARMGlobalMergePass(getTargetLowering())); return false; @@ -139,7 +115,6 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, PM.add(createARMLoadStoreOptimizationPass(true)); if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); - return true; } @@ -150,7 +125,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, if (!Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); if (Subtarget.hasNEON()) - PM.add(createNEONMoveFixPass()); + PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow @@ -179,10 +154,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // FIXME: Move this to TargetJITInfo! - if (DefRelocModel == Reloc::Default) - setRelocationModel(Reloc::Static); - // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index bc3d46a..c8c601c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -37,11 +37,11 @@ protected: private: ARMJITInfo JITInfo; InstrItineraryData InstrItins; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. public: - ARMBaseTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ARMBaseTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -69,8 +69,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; public: - ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ARMTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -108,8 +109,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; public: - ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ThumbTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index d9a5fa2..14d35ba 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -7,16 +7,15 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -30,7 +29,7 @@ using namespace llvm; namespace { -class ARMBaseAsmLexer : public TargetAsmLexer { +class ARMBaseAsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; const AsmToken &lexDefinite() { @@ -43,7 +42,7 @@ protected: rmap_ty RegisterMap; - void InitRegisterMap(const TargetRegisterInfo *info) { + void InitRegisterMap(const MCRegisterInfo *info) { unsigned numRegs = info->getNumRegs(); for (unsigned i = 0; i < numRegs; ++i) { @@ -77,33 +76,23 @@ protected: } public: ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : MCTargetAsmLexer(T), AsmInfo(MAI) { } }; class ARMAsmLexer : public ARMBaseAsmLexer { public: - ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("arm-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; class ThumbAsmLexer : public ARMBaseAsmLexer { public: - ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI) : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("thumb-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; @@ -149,6 +138,6 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() { } extern "C" void LLVMInitializeARMAsmLexer() { - RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); - RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); + RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget); + RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); } diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a474127..24f15b4 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,11 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMMCExpr.h" -#include "ARMBaseRegisterInfo.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -20,12 +18,17 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -37,49 +40,65 @@ namespace { class ARMOperand; -class ARMAsmParser : public TargetAsmParser { +class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + struct { + ARMCC::CondCodes Cond; // Condition for IT block. + unsigned Mask:4; // Condition mask for instructions. + // Starting at first 1 (from lsb). + // '1' condition as indicated in IT. + // '0' inverse of condition (else). + // Count of instructions in IT block is + // 4 - trailingzeroes(mask) + + bool FirstCond; // Explicit flag for when we're parsing the + // First instruction in the IT block. It's + // implied in the mask, so needs special + // handling. + + unsigned CurPosition; // Current position in parsing of IT + // block. In range [0,3]. Initialized + // according to count of instructions in block. + // ~0U if no active IT block. + } ITState; + bool inITBlock() { return ITState.CurPosition != ~0U;} + void forwardITPosition() { + if (!inITBlock()) return; + // Move to the next instruction in the IT block, if there is one. If not, + // mark the block as done. + unsigned TZ = CountTrailingZeros_32(ITState.Mask); + if (++ITState.CurPosition == 5 - TZ) + ITState.CurPosition = ~0U; // Done with the IT block after this. + } + + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - int TryParseRegister(); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); - int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &, - ARMII::AddrMode AddrMode); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); - bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); - const MCExpr *ApplyPrefixToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind Variant); - - - bool ParseMemoryOffsetReg(bool &Negative, - bool &OffsetRegShifted, - enum ARM_AM::ShiftOpc &ShiftType, - const MCExpr *&ShiftAmount, - const MCExpr *&Offset, - bool &OffsetIsReg, - int &OffsetRegNum, - SMLoc &E); - bool ParseShift(enum ARM_AM::ShiftOpc &St, - const MCExpr *&ShiftAmount, SMLoc &E); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool ParseDirectiveThumb(SMLoc L); - bool ParseDirectiveThumbFunc(SMLoc L); - bool ParseDirectiveCode(SMLoc L); - bool ParseDirectiveSyntax(SMLoc L); - - bool MatchAndEmitInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); - void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + int tryParseRegister(); + bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); + int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); + bool parsePrefix(ARMMCExpr::VariantKind &RefKind); + bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, + unsigned &ShiftAmount); + bool parseDirectiveWord(unsigned Size, SMLoc L); + bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveThumbFunc(SMLoc L); + bool parseDirectiveCode(SMLoc L); + bool parseDirectiveSyntax(SMLoc L); + + StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, + bool &CarrySetting, unsigned &ProcessorIMod, + StringRef &ITMask); + void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); bool isThumb() const { @@ -89,10 +108,22 @@ class ARMAsmParser : public TargetAsmParser { bool isThumbOne() const { return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0; } + bool isThumbTwo() const { + return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2); + } + bool hasV6Ops() const { + return STI.getFeatureBits() & ARM::HasV6Ops; + } + bool hasV7Ops() const { + return STI.getFeatureBits() & ARM::HasV7Ops; + } void SwitchMode() { unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } + bool isMClass() const { + return STI.getFeatureBits() & ARM::FeatureMClass; + } /// @name Auto-generated Match Functions /// { @@ -102,43 +133,108 @@ class ARMAsmParser : public TargetAsmParser { /// } - OperandMatchResultTy tryParseCoprocNumOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseCoprocRegOperand( + OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseCoprocNumOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemBarrierOptOperand( + OperandMatchResultTy parseCoprocRegOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseProcIFlagsOperand( + OperandMatchResultTy parseCoprocOptionOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMSRMaskOperand( + OperandMatchResultTy parseMemBarrierOptOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemMode2Operand( + OperandMatchResultTy parseProcIFlagsOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemMode3Operand( + OperandMatchResultTy parseMSRMaskOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O, + StringRef Op, int Low, int High); + OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + return parsePKHImm(O, "lsl", 0, 31); + } + OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + return parsePKHImm(O, "asr", 1, 32); + } + OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); // Asm Match Converter Methods - bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + void processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + bool shouldOmitCCOutOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); public: + enum ARMMatchResultTy { + Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresNotITBlock, + Match_RequiresV6, + Match_RequiresThumb2 + }; + ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : TargetAsmParser(), STI(_STI), Parser(_Parser) { + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + + // Not in an ITBlock to start with. + ITState.CurPosition = ~0U; } - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - virtual bool ParseDirective(AsmToken DirectiveID); + // Implementation of the MCTargetAsmParser interface: + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseDirective(AsmToken DirectiveID); + + unsigned checkTargetMatchPredicate(MCInst &Inst); + + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); }; } // end anonymous namespace @@ -148,22 +244,30 @@ namespace { /// instruction. class ARMOperand : public MCParsedAsmOperand { enum KindTy { - CondCode, - CCOut, - CoprocNum, - CoprocReg, - Immediate, - MemBarrierOpt, - Memory, - MSRMask, - ProcIFlags, - Register, - RegisterList, - DPRRegisterList, - SPRRegisterList, - ShiftedRegister, - Shifter, - Token + k_CondCode, + k_CCOut, + k_ITCondMask, + k_CoprocNum, + k_CoprocReg, + k_CoprocOption, + k_Immediate, + k_FPImmediate, + k_MemBarrierOpt, + k_Memory, + k_PostIndexRegister, + k_MSRMask, + k_ProcIFlags, + k_VectorIndex, + k_Register, + k_RegisterList, + k_DPRRegisterList, + k_SPRRegisterList, + k_ShiftedRegister, + k_ShiftedImmediate, + k_ShifterImmediate, + k_RotateImmediate, + k_BitfieldDescriptor, + k_Token } Kind; SMLoc StartLoc, EndLoc; @@ -175,12 +279,20 @@ class ARMOperand : public MCParsedAsmOperand { } CC; struct { - ARM_MB::MemBOpt Val; - } MBOpt; + unsigned Val; + } Cop; struct { unsigned Val; - } Cop; + } CoprocOption; + + struct { + unsigned Mask:4; + } ITMask; + + struct { + ARM_MB::MemBOpt Val; + } MBOpt; struct { ARM_PROC::IFlags Val; @@ -200,37 +312,60 @@ class ARMOperand : public MCParsedAsmOperand { } Reg; struct { + unsigned Val; + } VectorIndex; + + struct { const MCExpr *Val; } Imm; + struct { + unsigned Val; // encoded 8-bit representation + } FPImm; + /// Combined record for all forms of ARM address expressions. struct { - ARMII::AddrMode AddrMode; unsigned BaseRegNum; - union { - unsigned RegNum; ///< Offset register num, when OffsetIsReg. - const MCExpr *Value; ///< Offset value, when !OffsetIsReg. - } Offset; - const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true - unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true - unsigned Preindexed : 1; - unsigned Postindexed : 1; - unsigned OffsetIsReg : 1; - unsigned Negative : 1; // only used when OffsetIsReg is true - unsigned Writeback : 1; - } Mem; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + } Memory; struct { + unsigned RegNum; + bool isAdd; ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + } PostIdxReg; + + struct { + bool isASR; unsigned Imm; - } Shift; + } ShifterImm; struct { ARM_AM::ShiftOpc ShiftTy; unsigned SrcReg; unsigned ShiftReg; unsigned ShiftImm; - } ShiftedReg; + } RegShiftedReg; + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + } RegShiftedImm; + struct { + unsigned Imm; + } RotImm; + struct { + unsigned LSB; + unsigned Width; + } Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -240,45 +375,69 @@ public: StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { - case CondCode: + case k_CondCode: CC = o.CC; break; - case Token: + case k_ITCondMask: + ITMask = o.ITMask; + break; + case k_Token: Tok = o.Tok; break; - case CCOut: - case Register: + case k_CCOut: + case k_Register: Reg = o.Reg; break; - case RegisterList: - case DPRRegisterList: - case SPRRegisterList: + case k_RegisterList: + case k_DPRRegisterList: + case k_SPRRegisterList: Registers = o.Registers; break; - case CoprocNum: - case CoprocReg: + case k_CoprocNum: + case k_CoprocReg: Cop = o.Cop; break; - case Immediate: + case k_CoprocOption: + CoprocOption = o.CoprocOption; + break; + case k_Immediate: Imm = o.Imm; break; - case MemBarrierOpt: + case k_FPImmediate: + FPImm = o.FPImm; + break; + case k_MemBarrierOpt: MBOpt = o.MBOpt; break; - case Memory: - Mem = o.Mem; + case k_Memory: + Memory = o.Memory; + break; + case k_PostIndexRegister: + PostIdxReg = o.PostIdxReg; break; - case MSRMask: + case k_MSRMask: MMask = o.MMask; break; - case ProcIFlags: + case k_ProcIFlags: IFlags = o.IFlags; break; - case Shifter: - Shift = o.Shift; + case k_ShifterImmediate: + ShifterImm = o.ShifterImm; + break; + case k_ShiftedRegister: + RegShiftedReg = o.RegShiftedReg; + break; + case k_ShiftedImmediate: + RegShiftedImm = o.RegShiftedImm; break; - case ShiftedRegister: - ShiftedReg = o.ShiftedReg; + case k_RotateImmediate: + RotImm = o.RotImm; + break; + case k_BitfieldDescriptor: + Bitfield = o.Bitfield; + break; + case k_VectorIndex: + VectorIndex = o.VectorIndex; break; } } @@ -289,94 +448,96 @@ public: SMLoc getEndLoc() const { return EndLoc; } ARMCC::CondCodes getCondCode() const { - assert(Kind == CondCode && "Invalid access!"); + assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; } unsigned getCoproc() const { - assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!"); + assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!"); return Cop.Val; } StringRef getToken() const { - assert(Kind == Token && "Invalid access!"); + assert(Kind == k_Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } unsigned getReg() const { - assert((Kind == Register || Kind == CCOut) && "Invalid access!"); + assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!"); return Reg.RegNum; } const SmallVectorImpl<unsigned> &getRegList() const { - assert((Kind == RegisterList || Kind == DPRRegisterList || - Kind == SPRRegisterList) && "Invalid access!"); + assert((Kind == k_RegisterList || Kind == k_DPRRegisterList || + Kind == k_SPRRegisterList) && "Invalid access!"); return Registers; } const MCExpr *getImm() const { - assert(Kind == Immediate && "Invalid access!"); + assert(Kind == k_Immediate && "Invalid access!"); return Imm.Val; } + unsigned getFPImm() const { + assert(Kind == k_FPImmediate && "Invalid access!"); + return FPImm.Val; + } + + unsigned getVectorIndex() const { + assert(Kind == k_VectorIndex && "Invalid access!"); + return VectorIndex.Val; + } + ARM_MB::MemBOpt getMemBarrierOpt() const { - assert(Kind == MemBarrierOpt && "Invalid access!"); + assert(Kind == k_MemBarrierOpt && "Invalid access!"); return MBOpt.Val; } ARM_PROC::IFlags getProcIFlags() const { - assert(Kind == ProcIFlags && "Invalid access!"); + assert(Kind == k_ProcIFlags && "Invalid access!"); return IFlags.Val; } unsigned getMSRMask() const { - assert(Kind == MSRMask && "Invalid access!"); + assert(Kind == k_MSRMask && "Invalid access!"); return MMask.Val; } - /// @name Memory Operand Accessors - /// @{ - ARMII::AddrMode getMemAddrMode() const { - return Mem.AddrMode; - } - unsigned getMemBaseRegNum() const { - return Mem.BaseRegNum; - } - unsigned getMemOffsetRegNum() const { - assert(Mem.OffsetIsReg && "Invalid access!"); - return Mem.Offset.RegNum; - } - const MCExpr *getMemOffset() const { - assert(!Mem.OffsetIsReg && "Invalid access!"); - return Mem.Offset.Value; - } - unsigned getMemOffsetRegShifted() const { - assert(Mem.OffsetIsReg && "Invalid access!"); - return Mem.OffsetRegShifted; + bool isCoprocNum() const { return Kind == k_CoprocNum; } + bool isCoprocReg() const { return Kind == k_CoprocReg; } + bool isCoprocOption() const { return Kind == k_CoprocOption; } + bool isCondCode() const { return Kind == k_CondCode; } + bool isCCOut() const { return Kind == k_CCOut; } + bool isITMask() const { return Kind == k_ITCondMask; } + bool isITCondCode() const { return Kind == k_CondCode; } + bool isImm() const { return Kind == k_Immediate; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isImm8s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } - const MCExpr *getMemShiftAmount() const { - assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); - return Mem.ShiftAmount; + bool isImm0_1020s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; } - enum ARM_AM::ShiftOpc getMemShiftType() const { - assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); - return Mem.ShiftType; + bool isImm0_508s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } - bool getMemPreindexed() const { return Mem.Preindexed; } - bool getMemPostindexed() const { return Mem.Postindexed; } - bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; } - bool getMemNegative() const { return Mem.Negative; } - bool getMemWriteback() const { return Mem.Writeback; } - - /// @} - - bool isCoprocNum() const { return Kind == CoprocNum; } - bool isCoprocReg() const { return Kind == CoprocReg; } - bool isCondCode() const { return Kind == CondCode; } - bool isCCOut() const { return Kind == CCOut; } - bool isImm() const { return Kind == Immediate; } bool isImm0_255() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -384,7 +545,7 @@ public: return Value >= 0 && Value < 256; } bool isImm0_7() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -392,130 +553,365 @@ public: return Value >= 0 && Value < 8; } bool isImm0_15() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 16; } + bool isImm0_31() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 32; + } + bool isImm1_16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 17; + } + bool isImm1_32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 33; + } bool isImm0_65535() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } - bool isT2SOImm() const { - if (Kind != Immediate) + bool isImm0_65535Expr() const { + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; + // If it's not a constant expression, it'll generate a fixup and be + // handled later. + if (!CE) return true; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(Value) != -1; + return Value >= 0 && Value < 65536; } - bool isReg() const { return Kind == Register; } - bool isRegList() const { return Kind == RegisterList; } - bool isDPRRegList() const { return Kind == DPRRegisterList; } - bool isSPRRegList() const { return Kind == SPRRegisterList; } - bool isToken() const { return Kind == Token; } - bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } - bool isMemory() const { return Kind == Memory; } - bool isShifter() const { return Kind == Shifter; } - bool isShiftedReg() const { return Kind == ShiftedRegister; } - bool isMemMode2() const { - if (getMemAddrMode() != ARMII::AddrMode2) + bool isImm24bit() const { + if (Kind != k_Immediate) return false; - - if (getMemOffsetIsReg()) - return true; - - if (getMemNegative() && - !(getMemPostindexed() || getMemPreindexed())) + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value <= 0xffffff; + } + bool isImmThumbSR() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - - // The offset must be in the range 0-4095 (imm12). - if (Value > 4095 || Value < -4095) + return Value > 0 && Value < 33; + } + bool isPKHLSLImm() const { + if (Kind != k_Immediate) return false; - - return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 32; } - bool isMemMode3() const { - if (getMemAddrMode() != ARMII::AddrMode3) + bool isPKHASRImm() const { + if (Kind != k_Immediate) return false; - - if (getMemOffsetIsReg()) { - if (getMemOffsetRegShifted()) - return false; // No shift with offset reg allowed - return true; - } - - if (getMemNegative() && - !(getMemPostindexed() || getMemPreindexed())) + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isARMSOImm() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - - // The offset must be in the range 0-255 (imm8). - if (Value > 255 || Value < -255) + return ARM_AM::getSOImmVal(Value) != -1; + } + bool isT2SOImm() const { + if (Kind != k_Immediate) return false; - - return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(Value) != -1; } - bool isMemMode5() const { - if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || - getMemNegative()) + bool isSetEndImm() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; - - // The offset must be a multiple of 4 in the range 0-1020. int64_t Value = CE->getValue(); - return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); - } - bool isMemMode7() const { - if (!isMemory() || - getMemPreindexed() || - getMemPostindexed() || - getMemOffsetIsReg() || - getMemNegative() || - getMemWriteback()) + return Value == 1 || Value == 0; + } + bool isReg() const { return Kind == k_Register; } + bool isRegList() const { return Kind == k_RegisterList; } + bool isDPRRegList() const { return Kind == k_DPRRegisterList; } + bool isSPRRegList() const { return Kind == k_SPRRegisterList; } + bool isToken() const { return Kind == k_Token; } + bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } + bool isMemory() const { return Kind == k_Memory; } + bool isShifterImm() const { return Kind == k_ShifterImmediate; } + bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } + bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } + bool isRotImm() const { return Kind == k_RotateImmediate; } + bool isBitfield() const { return Kind == k_BitfieldDescriptor; } + bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } + bool isPostIdxReg() const { + return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift; + } + bool isMemNoOffset(bool alignOK = false) const { + if (!isMemory()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + // No offset of any kind. + return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && + (alignOK || Memory.Alignment == 0); + } + bool isAlignedMemory() const { + return isMemNoOffset(true); + } + bool isAddrMode2() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return true; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -4096 && Val < 4096; + } + bool isAM2OffsetImm() const { + if (Kind != k_Immediate) + return false; + // Immediate offset in range [-4095, 4095]. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; - - if (CE->getValue()) + int64_t Val = CE->getValue(); + return Val > -4096 && Val < 4096; + } + bool isAddrMode3() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // No shifts are legal for AM3. + if (Memory.ShiftType != ARM_AM::no_shift) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return true; + // Immediate offset in range [-255, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -256 && Val < 256; + } + bool isAM3Offset() const { + if (Kind != k_Immediate && Kind != k_PostIndexRegister) + return false; + if (Kind == k_PostIndexRegister) + return PostIdxReg.ShiftTy == ARM_AM::no_shift; + // Immediate offset in range [-255, 255]. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Val = CE->getValue(); + // Special case, #-0 is INT32_MIN. + return (Val > -256 && Val < 256) || Val == INT32_MIN; + } + bool isAddrMode5() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return false; + // Immediate offset in range [-1020, 1020] and a multiple of 4. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || + Val == INT32_MIN; + } + bool isMemTBB() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) + return false; + return true; + } + bool isMemTBH() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 || + Memory.Alignment != 0 ) + return false; + return true; + } + bool isMemRegOffset() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0) return false; - return true; } - bool isMemModeRegThumb() const { - if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) + bool isT2MemRegOffset() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.Alignment != 0) + return false; + // Only lsl #{0, 1, 2, 3} allowed. + if (Memory.ShiftType == ARM_AM::no_shift) + return true; + if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3) return false; return true; } - bool isMemModeImmThumb() const { - if (!isMemory() || getMemOffsetIsReg() || getMemWriteback()) + bool isMemThumbRR() const { + // Thumb reg+reg addressing is simple. Just two registers, a base and + // an offset. No shifts, negations or any other complicating factors. + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) + return false; + return isARMLowRegister(Memory.BaseRegNum) && + (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum)); + } + bool isMemThumbRIs4() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 124]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 124 && (Val % 4) == 0; + } + bool isMemThumbRIs2() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 62]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 62 && (Val % 2) == 0; + } + bool isMemThumbRIs1() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset in range [0, 31]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 31; + } + bool isMemThumbSPI() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 1020 && (Val % 4) == 0; + } + bool isMemImm8s4Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset a multiple of 4 in range [-1020, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= -1020 && Val <= 1020 && (Val & 3) == 0; + } + bool isMemImm0_1020s4Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset a multiple of 4 in range [0, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 1020 && (Val & 3) == 0; + } + bool isMemImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-255, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val == INT32_MIN) || (Val > -256 && Val < 256); + } + bool isMemPosImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [0, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val < 256; + } + bool isMemNegImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-255, -1]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -256 && Val < 0; + } + bool isMemUImm12Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + return true; + + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Immediate offset in range [0, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= 0 && Val < 4096); + } + bool isMemImm12Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + } + bool isPostIdxImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Val = CE->getValue(); + return (Val > -256 && Val < 256) || (Val == INT32_MIN); + } + bool isPostIdxImm8s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; + int64_t Val = CE->getValue(); + return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) || + (Val == INT32_MIN); + } - // The offset must be a multiple of 4 in the range 0-124. - uint64_t Value = CE->getValue(); - return ((Value & 0x3) == 0 && Value <= 124); + bool isMSRMask() const { return Kind == k_MSRMask; } + bool isProcIFlags() const { return Kind == k_ProcIFlags; } + + bool isVectorIndex8() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 8; + } + bool isVectorIndex16() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 4; } - bool isMSRMask() const { return Kind == MSRMask; } - bool isProcIFlags() const { return Kind == ProcIFlags; } + bool isVectorIndex32() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 2; + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. @@ -544,6 +940,21 @@ public: Inst.addOperand(MCOperand::CreateImm(getCoproc())); } + void addCoprocOptionOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val)); + } + + void addITMaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(ITMask.Mask)); + } + + void addITCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + } + void addCCOutOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -554,22 +965,27 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } - void addShiftedRegOperands(MCInst &Inst, unsigned N) const { + void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!"); - assert((ShiftedReg.ShiftReg == 0 || - ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) && - "Invalid shifted register operand!"); - Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg)); - Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg)); + assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!"); + Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); + Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( - ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm))); + ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm))); } - void addShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); + Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); Inst.addOperand(MCOperand::CreateImm( - ARM_AM::getSORegOpc(Shift.ShiftTy, 0))); + ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); + } + + void addShifterImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) | + ShifterImm.Imm)); } void addRegListOperands(MCInst &Inst, unsigned N) const { @@ -588,11 +1004,57 @@ public: addRegListOperands(Inst, N); } + void addRotImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Encoded as val>>3. The printer handles display as 8, 16, 24. + Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3)); + } + + void addBitfieldOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Munge the lsb/width into a bitfield mask. + unsigned lsb = Bitfield.LSB; + unsigned width = Bitfield.Width; + // Make a 32-bit mask w/ the referenced bits clear and all other bits set. + uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >> + (32 - (lsb + width))); + Inst.addOperand(MCOperand::CreateImm(Mask)); + } + void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addFPImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getFPImm())); + } + + void addImm8s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // FIXME: We really want to scale the value here, but the LDRD/STRD + // instruction don't encode operands that way yet. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); + } + + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); + } + void addImm0_255Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); @@ -608,137 +1070,344 @@ public: addExpr(Inst, getImm()); } + void addImm0_31Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm1_16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate-1, and we store in the instruction + // the bits as encoded, so subtract off one here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addImm1_32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate-1, and we store in the instruction + // the bits as encoded, so subtract off one here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + void addImm0_65535Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm24bitOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImmThumbSROperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate, except for 32, which encodes as + // zero. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Imm = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm))); + } + + void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addPKHASRImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // An ASR value of 32 encodes as 0, so that's how we want to add it to + // the instruction as well. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val)); + } + + void addARMSOImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addT2SOImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addSetEndImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } - void addMemMode7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemMode7() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - (void)CE; - assert((CE || CE->getValue() == 0) && - "No offset operand support in mode 7"); + void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); } - void addMemMode2Operands(MCInst &Inst, unsigned N) const { - assert(isMemMode2() && "Invalid mode or number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Memory.Alignment)); + } - if (getMemOffsetIsReg()) { - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + void addAddrMode2Operands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + if (!Memory.OffsetRegNum) { + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); + } else { + // For register offset, we encode the shift type and negation flag + // here. + Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); + } + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; - ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift; - int64_t ShiftAmount = 0; + void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant AM2OffsetImm operand!"); + int32_t Val = CE->getValue(); + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - if (getMemOffsetRegShifted()) { - ShOpc = getMemShiftType(); - const MCConstantExpr *CE = - dyn_cast<MCConstantExpr>(getMemShiftAmount()); - ShiftAmount = CE->getValue(); - } + void addAddrMode3Operands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + if (!Memory.OffsetRegNum) { + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM3Opc(AddSub, Val); + } else { + // For register offset, we encode the shift type and negation flag + // here. + Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0); + } + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount, - ShOpc, IdxMode))); + void addAM3OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + if (Kind == k_PostIndexRegister) { + int32_t Val = + ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); return; } - // Create a operand placeholder to always yield the same number of operands. + // Constant offset. + const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm()); + int32_t Val = CE->getValue(); + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM3Opc(AddSub, Val); Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 2 offset operand!"); - int64_t Offset = CE->getValue(); + void addAddrMode5Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // The lower two bits are always zero and as such are not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM5Opc(AddSub, Val); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add, - Offset, ARM_AM::no_shift, IdxMode))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub, - -Offset, ARM_AM::no_shift, IdxMode))); + void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // The lower two bits are always zero and as such are not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const { + addMemImm8OffsetOperands(Inst, N); + } + + void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const { + addMemImm8OffsetOperands(Inst, N); } - void addMemMode3Operands(MCInst &Inst, unsigned N) const { - assert(isMemMode3() && "Invalid mode or number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If this is an immediate, it's a label reference. + if (Kind == k_Immediate) { + addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } - if (getMemOffsetIsReg()) { - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + // Otherwise, it's a normal memory reg+offset. + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0, - IdxMode))); + void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If this is an immediate, it's a label reference. + if (Kind == k_Immediate) { + addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::CreateImm(0)); return; } - // Create a operand placeholder to always yield the same number of operands. - Inst.addOperand(MCOperand::CreateReg(0)); + // Otherwise, it's a normal memory reg+offset. + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 3 offset operand!"); - int64_t Offset = CE->getValue(); + void addMemTBBOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + } - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add, - Offset, IdxMode))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub, - -Offset, IdxMode))); + void addMemTBHOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); } - void addMemMode5Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemMode5() && "Invalid number of operands!"); + void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - assert(!getMemOffsetIsReg() && "Invalid mode 5 operand"); + void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 5 offset operand!"); + void addMemThumbRROperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + } - // The MCInst offset operand doesn't include the low two bits (like - // the instruction encoding). - int64_t Offset = CE->getValue() / 4; - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, - Offset))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, - -Offset))); + void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); } - void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); } - void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode offset operand!"); - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant post-idx-imm8 operand!"); + int Imm = CE->getValue(); + bool isAdd = Imm >= 0; + if (Imm == INT32_MIN) Imm = 0; + Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant post-idx-imm8s4 operand!"); + int Imm = CE->getValue(); + bool isAdd = Imm >= 0; + if (Imm == INT32_MIN) Imm = 0; + // Immediate is scaled by 4. + Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addPostIdxRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd)); + } + + void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + // The sign, shift type, and shift amount are encoded in a single operand + // using the AM2 encoding helpers. + ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub; + unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm, + PostIdxReg.ShiftTy); + Inst.addOperand(MCOperand::CreateImm(Imm)); } void addMSRMaskOperands(MCInst &Inst, unsigned N) const { @@ -751,10 +1420,33 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndex16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndex32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + virtual void print(raw_ostream &OS) const; + static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) { + ARMOperand *Op = new ARMOperand(k_ITCondMask); + Op->ITMask.Mask = Mask; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - ARMOperand *Op = new ARMOperand(CondCode); + ARMOperand *Op = new ARMOperand(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -762,7 +1454,7 @@ public: } static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(CoprocNum); + ARMOperand *Op = new ARMOperand(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -770,15 +1462,23 @@ public: } static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(CoprocReg); + ARMOperand *Op = new ARMOperand(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } + static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_CoprocOption); + Op->Cop.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { - ARMOperand *Op = new ARMOperand(CCOut); + ARMOperand *Op = new ARMOperand(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; @@ -786,7 +1486,7 @@ public: } static ARMOperand *CreateToken(StringRef Str, SMLoc S) { - ARMOperand *Op = new ARMOperand(Token); + ARMOperand *Op = new ARMOperand(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -795,7 +1495,7 @@ public: } static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Register); + ARMOperand *Op = new ARMOperand(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; @@ -807,20 +1507,52 @@ public: unsigned ShiftReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(ShiftedRegister); - Op->ShiftedReg.ShiftTy = ShTy; - Op->ShiftedReg.SrcReg = SrcReg; - Op->ShiftedReg.ShiftReg = ShiftReg; - Op->ShiftedReg.ShiftImm = ShiftImm; + ARMOperand *Op = new ARMOperand(k_ShiftedRegister); + Op->RegShiftedReg.ShiftTy = ShTy; + Op->RegShiftedReg.SrcReg = SrcReg; + Op->RegShiftedReg.ShiftReg = ShiftReg; + Op->RegShiftedReg.ShiftImm = ShiftImm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, + unsigned SrcReg, + unsigned ShiftImm, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_ShiftedImmediate); + Op->RegShiftedImm.ShiftTy = ShTy; + Op->RegShiftedImm.SrcReg = SrcReg; + Op->RegShiftedImm.ShiftImm = ShiftImm; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, + static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Shifter); - Op->Shift.ShiftTy = ShTy; + ARMOperand *Op = new ARMOperand(k_ShifterImmediate); + Op->ShifterImm.isASR = isASR; + Op->ShifterImm.Imm = Imm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_RotateImmediate); + Op->RotImm.Imm = Imm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor); + Op->Bitfield.LSB = LSB; + Op->Bitfield.Width = Width; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -829,12 +1561,13 @@ public: static ARMOperand * CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { - KindTy Kind = RegisterList; + KindTy Kind = k_RegisterList; - if (ARM::DPRRegClass.contains(Regs.front().first)) - Kind = DPRRegisterList; - else if (ARM::SPRRegClass.contains(Regs.front().first)) - Kind = SPRRegisterList; + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first)) + Kind = k_DPRRegisterList; + else if (ARMMCRegisterClasses[ARM::SPRRegClassID]. + contains(Regs.front().first)) + Kind = k_SPRRegisterList; ARMOperand *Op = new ARMOperand(Kind); for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator @@ -846,55 +1579,68 @@ public: return Op; } + static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, + MCContext &Ctx) { + ARMOperand *Op = new ARMOperand(k_VectorIndex); + Op->VectorIndex.Val = Idx; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Immediate); + ARMOperand *Op = new ARMOperand(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum, - bool OffsetIsReg, const MCExpr *Offset, - int OffsetRegNum, bool OffsetRegShifted, - enum ARM_AM::ShiftOpc ShiftType, - const MCExpr *ShiftAmount, bool Preindexed, - bool Postindexed, bool Negative, bool Writeback, + static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { + ARMOperand *Op = new ARMOperand(k_FPImmediate); + Op->FPImm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateMem(unsigned BaseRegNum, + const MCConstantExpr *OffsetImm, + unsigned OffsetRegNum, + ARM_AM::ShiftOpc ShiftType, + unsigned ShiftImm, + unsigned Alignment, + bool isNegative, SMLoc S, SMLoc E) { - assert((OffsetRegNum == -1 || OffsetIsReg) && - "OffsetRegNum must imply OffsetIsReg!"); - assert((!OffsetRegShifted || OffsetIsReg) && - "OffsetRegShifted must imply OffsetIsReg!"); - assert((Offset || OffsetIsReg) && - "Offset must exists unless register offset is used!"); - assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) && - "Cannot have shift amount without shifted register offset!"); - assert((!Offset || !OffsetIsReg) && - "Cannot have expression offset and register offset!"); - - ARMOperand *Op = new ARMOperand(Memory); - Op->Mem.AddrMode = AddrMode; - Op->Mem.BaseRegNum = BaseRegNum; - Op->Mem.OffsetIsReg = OffsetIsReg; - if (OffsetIsReg) - Op->Mem.Offset.RegNum = OffsetRegNum; - else - Op->Mem.Offset.Value = Offset; - Op->Mem.OffsetRegShifted = OffsetRegShifted; - Op->Mem.ShiftType = ShiftType; - Op->Mem.ShiftAmount = ShiftAmount; - Op->Mem.Preindexed = Preindexed; - Op->Mem.Postindexed = Postindexed; - Op->Mem.Negative = Negative; - Op->Mem.Writeback = Writeback; + ARMOperand *Op = new ARMOperand(k_Memory); + Op->Memory.BaseRegNum = BaseRegNum; + Op->Memory.OffsetImm = OffsetImm; + Op->Memory.OffsetRegNum = OffsetRegNum; + Op->Memory.ShiftType = ShiftType; + Op->Memory.ShiftImm = ShiftImm; + Op->Memory.Alignment = Alignment; + Op->Memory.isNegative = isNegative; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd, + ARM_AM::ShiftOpc ShiftTy, + unsigned ShiftImm, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_PostIndexRegister); + Op->PostIdxReg.RegNum = RegNum; + Op->PostIdxReg.isAdd = isAdd; + Op->PostIdxReg.ShiftTy = ShiftTy; + Op->PostIdxReg.ShiftImm = ShiftImm; Op->StartLoc = S; Op->EndLoc = E; return Op; } static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - ARMOperand *Op = new ARMOperand(MemBarrierOpt); + ARMOperand *Op = new ARMOperand(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -902,7 +1648,7 @@ public: } static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - ARMOperand *Op = new ARMOperand(ProcIFlags); + ARMOperand *Op = new ARMOperand(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; @@ -910,7 +1656,7 @@ public: } static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) { - ARMOperand *Op = new ARMOperand(MSRMask); + ARMOperand *Op = new ARMOperand(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -922,53 +1668,56 @@ public: void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { - case CondCode: + case k_FPImmediate: + OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm()) + << ") >"; + break; + case k_CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; break; - case CCOut: + case k_CCOut: OS << "<ccout " << getReg() << ">"; break; - case CoprocNum: + case k_ITCondMask: { + static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)", + "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)", + "(tee)", "(eee)" }; + assert((ITMask.Mask & 0xf) == ITMask.Mask); + OS << "<it-mask " << MaskStr[ITMask.Mask] << ">"; + break; + } + case k_CoprocNum: OS << "<coprocessor number: " << getCoproc() << ">"; break; - case CoprocReg: + case k_CoprocReg: OS << "<coprocessor register: " << getCoproc() << ">"; break; - case MSRMask: + case k_CoprocOption: + OS << "<coprocessor option: " << CoprocOption.Val << ">"; + break; + case k_MSRMask: OS << "<mask: " << getMSRMask() << ">"; break; - case Immediate: + case k_Immediate: getImm()->print(OS); break; - case MemBarrierOpt: + case k_MemBarrierOpt: OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">"; break; - case Memory: + case k_Memory: OS << "<memory " - << "am:" << ARMII::AddrModeToString(getMemAddrMode()) - << " base:" << getMemBaseRegNum(); - if (getMemOffsetIsReg()) { - OS << " offset:<register " << getMemOffsetRegNum(); - if (getMemOffsetRegShifted()) { - OS << " offset-shift-type:" << getMemShiftType(); - OS << " offset-shift-amount:" << *getMemShiftAmount(); - } - } else { - OS << " offset:" << *getMemOffset(); - } - if (getMemOffsetIsReg()) - OS << " (offset-is-reg)"; - if (getMemPreindexed()) - OS << " (pre-indexed)"; - if (getMemPostindexed()) - OS << " (post-indexed)"; - if (getMemNegative()) - OS << " (negative)"; - if (getMemWriteback()) - OS << " (writeback)"; + << " base:" << Memory.BaseRegNum; + OS << ">"; + break; + case k_PostIndexRegister: + OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-") + << PostIdxReg.RegNum; + if (PostIdxReg.ShiftTy != ARM_AM::no_shift) + OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " " + << PostIdxReg.ShiftImm; OS << ">"; break; - case ProcIFlags: { + case k_ProcIFlags: { OS << "<ARM_PROC::"; unsigned IFlags = getProcIFlags(); for (int i=2; i >= 0; --i) @@ -977,23 +1726,38 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ">"; break; } - case Register: + case k_Register: OS << "<register " << getReg() << ">"; break; - case Shifter: - OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">"; + case k_ShifterImmediate: + OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl") + << " #" << ShifterImm.Imm << ">"; + break; + case k_ShiftedRegister: + OS << "<so_reg_reg " + << RegShiftedReg.SrcReg + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm)) + << ", " << RegShiftedReg.ShiftReg << ", " + << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm) + << ">"; break; - case ShiftedRegister: - OS << "<so_reg" - << ShiftedReg.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm)) - << ", " << ShiftedReg.ShiftReg << ", " - << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) + case k_ShiftedImmediate: + OS << "<so_reg_imm " + << RegShiftedImm.SrcReg + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm)) + << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm) << ">"; break; - case RegisterList: - case DPRRegisterList: - case SPRRegisterList: { + case k_RotateImmediate: + OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; + break; + case k_BitfieldDescriptor: + OS << "<bitfield " << "lsb: " << Bitfield.LSB + << ", width: " << Bitfield.Width << ">"; + break; + case k_RegisterList: + case k_DPRRegisterList: + case k_SPRRegisterList: { OS << "<register_list "; const SmallVectorImpl<unsigned> &RegList = getRegList(); @@ -1006,9 +1770,12 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ">"; break; } - case Token: + case k_Token: OS << "'" << getToken() << "'"; break; + case k_VectorIndex: + OS << "<vectorindex " << getVectorIndex() << ">"; + break; } } @@ -1021,7 +1788,7 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - RegNo = TryParseRegister(); + RegNo = tryParseRegister(); return (RegNo == (unsigned)-1); } @@ -1030,9 +1797,9 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo, /// and if it is a register name the token is eaten and the register number is /// returned. Otherwise return -1. /// -int ARMAsmParser::TryParseRegister() { +int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) return -1; // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. @@ -1050,6 +1817,39 @@ int ARMAsmParser::TryParseRegister() { if (!RegNum) return -1; Parser.Lex(); // Eat identifier token. + +#if 0 + // Also check for an index operand. This is only legal for vector registers, + // but that'll get caught OK in operand matching, so we don't need to + // explicitly filter everything else out here. + if (Parser.getTok().is(AsmToken::LBrac)) { + SMLoc SIdx = Parser.getTok().getLoc(); + Parser.Lex(); // Eat left bracket token. + + const MCExpr *ImmVal; + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ImmVal)) + return MatchOperand_ParseFail; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), + SIdx, E, + getContext())); + } +#endif + return RegNum; } @@ -1058,7 +1858,7 @@ int ARMAsmParser::TryParseRegister() { // occurs, return -1. An irrecoverable error is one where tokens have been // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). -int ARMAsmParser::TryParseShiftRegister( +int ARMAsmParser::tryParseShiftRegister( SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); @@ -1120,7 +1920,7 @@ int ARMAsmParser::TryParseShiftRegister( return -1; } } else if (Parser.getTok().is(AsmToken::Identifier)) { - ShiftReg = TryParseRegister(); + ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); if (ShiftReg == -1) { Error (L, "expected immediate or register in shift operand"); @@ -1133,8 +1933,12 @@ int ARMAsmParser::TryParseShiftRegister( } } - Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, - ShiftReg, Imm, + if (ShiftReg && ShiftTy != ARM_AM::rrx) + Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, + ShiftReg, Imm, + S, Parser.getTok().getLoc())); + else + Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm, S, Parser.getTok().getLoc())); return 0; @@ -1148,9 +1952,9 @@ int ARMAsmParser::TryParseShiftRegister( /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. bool ARMAsmParser:: -TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - int RegNo = TryParseRegister(); + int RegNo = tryParseRegister(); if (RegNo == -1) return true; @@ -1161,6 +1965,37 @@ TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), ExclaimTok.getLoc())); Parser.Lex(); // Eat exclaim token + return false; + } + + // Also check for an index operand. This is only legal for vector registers, + // but that'll get caught OK in operand matching, so we don't need to + // explicitly filter everything else out here. + if (Parser.getTok().is(AsmToken::LBrac)) { + SMLoc SIdx = Parser.getTok().getLoc(); + Parser.Lex(); // Eat left bracket token. + + const MCExpr *ImmVal; + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ImmVal)) + return MatchOperand_ParseFail; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), + SIdx, E, + getContext())); } return false; @@ -1209,14 +2044,50 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { return -1; } -/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The +/// parseITCondCode - Try to parse a condition code for an IT instruction. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + unsigned CC = StringSwitch<unsigned>(Tok.getString()) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("cs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("cc", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC == ~0U) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the token. + + Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S)); + + return MatchOperand_Success; +} + +/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; int Num = MatchCoprocessorOperandName(Tok.getString(), 'p'); if (Num == -1) @@ -1227,14 +2098,15 @@ tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The +/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c'); if (Reg == -1) @@ -1245,93 +2117,155 @@ tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// Parse a register list, return it if successful else return null. The first -/// token must be a '{' when called. -bool ARMAsmParser:: -ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LCurly) && - "Token is not a Left Curly Brace"); +/// parseCoprocOptionOperand - Try to parse an coprocessor option operand. +/// coproc_option : '{' imm0_255 '}' +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - // Read the rest of the registers in the list. - unsigned PrevRegNum = 0; - SmallVector<std::pair<unsigned, SMLoc>, 32> Registers; - - do { - bool IsRange = Parser.getTok().is(AsmToken::Minus); - Parser.Lex(); // Eat non-identifier token. - - const AsmToken &RegTok = Parser.getTok(); - SMLoc RegLoc = RegTok.getLoc(); - if (RegTok.isNot(AsmToken::Identifier)) { - Error(RegLoc, "register expected"); - return true; - } - - int RegNum = TryParseRegister(); - if (RegNum == -1) { - Error(RegLoc, "register expected"); - return true; - } - - if (IsRange) { - int Reg = PrevRegNum; - do { - ++Reg; - Registers.push_back(std::make_pair(Reg, RegLoc)); - } while (Reg != RegNum); - } else { - Registers.push_back(std::make_pair(RegNum, RegLoc)); - } - - PrevRegNum = RegNum; - } while (Parser.getTok().is(AsmToken::Comma) || - Parser.getTok().is(AsmToken::Minus)); + // If this isn't a '{', this isn't a coprocessor immediate operand. + if (Parser.getTok().isNot(AsmToken::LCurly)) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the '{' - // Process the right curly brace of the list. - const AsmToken &RCurlyTok = Parser.getTok(); - if (RCurlyTok.isNot(AsmToken::RCurly)) { - Error(RCurlyTok.getLoc(), "'}' expected"); - return true; + const MCExpr *Expr; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(Expr)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE || CE->getValue() < 0 || CE->getValue() > 255) { + Error(Loc, "coprocessor option must be an immediate in range [0, 255]"); + return MatchOperand_ParseFail; + } + int Val = CE->getValue(); - SMLoc E = RCurlyTok.getLoc(); - Parser.Lex(); // Eat right curly brace token. - - // Verify the register list. - SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator - RI = Registers.begin(), RE = Registers.end(); + // Check for and consume the closing '}' + if (Parser.getTok().isNot(AsmToken::RCurly)) + return MatchOperand_ParseFail; + SMLoc E = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the '}' - unsigned HighRegNum = getARMRegisterNumbering(RI->first); - bool EmittedWarning = false; + Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E)); + return MatchOperand_Success; +} - DenseMap<unsigned, bool> RegMap; - RegMap[HighRegNum] = true; +// For register list parsing, we need to map from raw GPR register numbering +// to the enumeration values. The enumeration values aren't sorted by +// register number due to our using "sp", "lr" and "pc" as canonical names. +static unsigned getNextRegister(unsigned Reg) { + // If this is a GPR, we need to do it manually, otherwise we can rely + // on the sort ordering of the enumeration since the other reg-classes + // are sane. + if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + return Reg + 1; + switch(Reg) { + default: assert(0 && "Invalid GPR number!"); + case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2; + case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4; + case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6; + case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8; + case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10; + case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12; + case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR; + case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0; + } +} - for (++RI; RI != RE; ++RI) { - const std::pair<unsigned, SMLoc> &RegInfo = *RI; - unsigned Reg = getARMRegisterNumbering(RegInfo.first); +/// Parse a register list. +bool ARMAsmParser:: +parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LCurly) && + "Token is not a Left Curly Brace"); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '{' token. + SMLoc RegLoc = Parser.getTok().getLoc(); - if (RegMap[Reg]) { - Error(RegInfo.second, "register duplicated in register list"); - return true; + // Check the first register in the list to see what register class + // this is a list of. + int Reg = tryParseRegister(); + if (Reg == -1) + return Error(RegLoc, "register expected"); + + MCRegisterClass *RC; + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; + else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::DPRRegClassID]; + else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::SPRRegClassID]; + else + return Error(RegLoc, "invalid register in register list"); + + // The reglist instructions have at most 16 registers, so reserve + // space for that many. + SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + // Store the first register. + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + + // This starts immediately after the first register token in the list, + // so we can see either a comma or a minus (range separator) as a legal + // next token. + while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)) { + if (Parser.getTok().is(AsmToken::Minus)) { + Parser.Lex(); // Eat the comma. + SMLoc EndLoc = Parser.getTok().getLoc(); + int EndReg = tryParseRegister(); + if (EndReg == -1) + return Error(EndLoc, "register expected"); + // If the register is the same as the start reg, there's nothing + // more to do. + if (Reg == EndReg) + continue; + // The register must be in the same register class as the first. + if (!RC->contains(EndReg)) + return Error(EndLoc, "invalid register in register list"); + // Ranges must go from low to high. + if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg)) + return Error(EndLoc, "bad range in register list"); + + // Add all the registers in the range to the register list. + while (Reg != EndReg) { + Reg = getNextRegister(Reg); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + } + continue; } - - if (!EmittedWarning && Reg < HighRegNum) - Warning(RegInfo.second, - "register not in ascending order in register list"); - - RegMap[Reg] = true; - HighRegNum = std::max(Reg, HighRegNum); + Parser.Lex(); // Eat the comma. + RegLoc = Parser.getTok().getLoc(); + int OldReg = Reg; + Reg = tryParseRegister(); + if (Reg == -1) + return Error(RegLoc, "register expected"); + // The register must be in the same register class as the first. + if (!RC->contains(Reg)) + return Error(RegLoc, "invalid register in register list"); + // List must be monotonically increasing. + if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) + return Error(RegLoc, "register list not in ascending order"); + // VFP register lists must also be contiguous. + // It's OK to use the enumeration values directly here rather, as the + // VFP register classes have the enum sorted properly. + if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && + Reg != OldReg + 1) + return Error(RegLoc, "non-contiguous register range"); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); } + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RCurly)) + return Error(E, "'}' expected"); + Parser.Lex(); // Eat '}' token. + Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); return false; } -/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. +/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); @@ -1360,28 +2294,32 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction. +/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef IFlagsStr = Tok.getString(); + // An iflags string of "none" is interpreted to mean that none of the AIF + // bits are set. Not a terribly useful instruction, but a valid encoding. unsigned IFlags = 0; - for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { - unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) - .Case("a", ARM_PROC::A) - .Case("i", ARM_PROC::I) - .Case("f", ARM_PROC::F) - .Default(~0U); - - // If some specific iflag is already set, it means that some letter is - // present more than once, this is not acceptable. - if (Flag == ~0U || (IFlags & Flag)) - return MatchOperand_NoMatch; + if (IFlagsStr != "none") { + for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { + unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) + .Case("a", ARM_PROC::A) + .Case("i", ARM_PROC::I) + .Case("f", ARM_PROC::F) + .Default(~0U); + + // If some specific iflag is already set, it means that some letter is + // present more than once, this is not acceptable. + if (Flag == ~0U || (IFlags & Flag)) + return MatchOperand_NoMatch; - IFlags |= Flag; + IFlags |= Flag; + } } Parser.Lex(); // Eat identifier token. @@ -1389,18 +2327,49 @@ tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction. +/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef Mask = Tok.getString(); + if (isMClass()) { + // See ARMv6-M 10.1.1 + unsigned FlagsVal = StringSwitch<unsigned>(Mask) + .Case("apsr", 0) + .Case("iapsr", 1) + .Case("eapsr", 2) + .Case("xpsr", 3) + .Case("ipsr", 5) + .Case("epsr", 6) + .Case("iepsr", 7) + .Case("msp", 8) + .Case("psp", 9) + .Case("primask", 16) + .Case("basepri", 17) + .Case("basepri_max", 18) + .Case("faultmask", 19) + .Case("control", 20) + .Default(~0U); + + if (FlagsVal == ~0U) + return MatchOperand_NoMatch; + + if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) + // basepri, basepri_max and faultmask only valid for V7m. + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); + return MatchOperand_Success; + } + // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf" size_t Start = 0, Next = Mask.find('_'); StringRef Flags = ""; - StringRef SpecReg = Mask.slice(Start, Next); + std::string SpecReg = LowercaseString(Mask.slice(Start, Next)); if (Next != StringRef::npos) Flags = Mask.slice(Next+1, Mask.size()); @@ -1411,7 +2380,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (SpecReg == "apsr") { FlagsVal = StringSwitch<unsigned>(Flags) - .Case("nzcvq", 0x8) // same as CPSR_c + .Case("nzcvq", 0x8) // same as CPSR_f .Case("g", 0x4) // same as CPSR_s .Case("nzcvqg", 0xc) // same as CPSR_fs .Default(~0U); @@ -1420,7 +2389,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!Flags.empty()) return MatchOperand_NoMatch; else - FlagsVal = 0; // No flag + FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { if (Flags == "all") // cpsr_all is an alias for cpsr_fc @@ -1455,96 +2424,680 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); +parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, + int Low, int High) { + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), Op + " operand expected."); + return MatchOperand_ParseFail; + } + StringRef ShiftName = Tok.getString(); + std::string LowerOp = LowercaseString(Op); + std::string UpperOp = UppercaseString(Op); + if (ShiftName != LowerOp && ShiftName != UpperOp) { + Error(Parser.getTok().getLoc(), Op + " operand expected."); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat shift type token. - if (ParseMemory(Operands, ARMII::AddrMode2)) - return MatchOperand_NoMatch; + // There must be a '#' and a shift amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + int Val = CE->getValue(); + if (Val < Low || Val > High) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc())); return MatchOperand_Success; } -/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); +parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(Tok.getLoc(), "'be' or 'le' operand expected"); + return MatchOperand_ParseFail; + } + int Val = StringSwitch<int>(Tok.getString()) + .Case("be", 1) + .Case("le", 0) + .Default(-1); + Parser.Lex(); // Eat the token. + + if (Val == -1) { + Error(Tok.getLoc(), "'be' or 'le' operand expected"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, + getContext()), + S, Parser.getTok().getLoc())); + return MatchOperand_Success; +} + +/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT +/// instructions. Legal values are: +/// lsl #n 'n' in [0,31] +/// asr #n 'n' in [1,32] +/// n == 32 encoded as n == 0. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(S, "shift operator 'asr' or 'lsl' expected"); + return MatchOperand_ParseFail; + } + StringRef ShiftName = Tok.getString(); + bool isASR; + if (ShiftName == "lsl" || ShiftName == "LSL") + isASR = false; + else if (ShiftName == "asr" || ShiftName == "ASR") + isASR = true; + else { + Error(S, "shift operator 'asr' or 'lsl' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the operator. + + // A '#' and a shift amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(E, "malformed shift expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(E, "shift amount must be an immediate"); + return MatchOperand_ParseFail; + } - if (ParseMemory(Operands, ARMII::AddrMode3)) + int64_t Val = CE->getValue(); + if (isASR) { + // Shift amount must be in [1,32] + if (Val < 1 || Val > 32) { + Error(E, "'asr' shift amount must be in range [1,32]"); + return MatchOperand_ParseFail; + } + // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode. + if (isThumb() && Val == 32) { + Error(E, "'asr #32' shift amount not allowed in Thumb mode"); + return MatchOperand_ParseFail; + } + if (Val == 32) Val = 0; + } else { + // Shift amount must be in [1,32] + if (Val < 0 || Val > 31) { + Error(E, "'lsr' shift amount must be in range [0,31]"); + return MatchOperand_ParseFail; + } + } + + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E)); + + return MatchOperand_Success; +} + +/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family +/// of instructions. Legal values are: +/// ror #n 'n' in {0, 8, 16, 24} +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef ShiftName = Tok.getString(); + if (ShiftName != "ror" && ShiftName != "ROR") return MatchOperand_NoMatch; + Parser.Lex(); // Eat the operator. + + // A '#' and a rotate amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(E, "malformed rotate expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(E, "rotate amount must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t Val = CE->getValue(); + // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension) + // normally, zero is represented in asm by omitting the rotate operand + // entirely. + if (Val != 8 && Val != 16 && Val != 24 && Val != 0) { + Error(E, "'ror' rotate amount must be 8, 16, or 24"); + return MatchOperand_ParseFail; + } + + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateRotImm(Val, S, E)); + + return MatchOperand_Success; +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + // The bitfield descriptor is really two operands, the LSB and the width. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *LSBExpr; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(LSBExpr)) { + Error(E, "malformed immediate expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr); + if (!CE) { + Error(E, "'lsb' operand must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t LSB = CE->getValue(); + // The LSB must be in the range [0,31] + if (LSB < 0 || LSB > 31) { + Error(E, "'lsb' operand must be in the range [0,31]"); + return MatchOperand_ParseFail; + } + E = Parser.getTok().getLoc(); + + // Expect another immediate operand. + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "too few operands"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *WidthExpr; + if (getParser().ParseExpression(WidthExpr)) { + Error(E, "malformed immediate expression"); + return MatchOperand_ParseFail; + } + CE = dyn_cast<MCConstantExpr>(WidthExpr); + if (!CE) { + Error(E, "'width' operand must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t Width = CE->getValue(); + // The LSB must be in the range [1,32-lsb] + if (Width < 1 || Width > 32 - LSB) { + Error(E, "'width' operand must be in the range [1,32-lsb]"); + return MatchOperand_ParseFail; + } + E = Parser.getTok().getLoc(); + + Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E)); return MatchOperand_Success; } -/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Check for a post-index addressing register operand. Specifically: + // postidx_reg := '+' register {, shift} + // | '-' register {, shift} + // | register {, shift} + + // This method must return MatchOperand_NoMatch without consuming any tokens + // in the case where there is no match, as other alternatives take other + // parse methods. + AsmToken Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + bool haveEaten = false; + bool isAdd = true; + int Reg = -1; + if (Tok.is(AsmToken::Plus)) { + Parser.Lex(); // Eat the '+' token. + haveEaten = true; + } else if (Tok.is(AsmToken::Minus)) { + Parser.Lex(); // Eat the '-' token. + isAdd = false; + haveEaten = true; + } + if (Parser.getTok().is(AsmToken::Identifier)) + Reg = tryParseRegister(); + if (Reg == -1) { + if (!haveEaten) + return MatchOperand_NoMatch; + Error(Parser.getTok().getLoc(), "register expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Parser.getTok().getLoc(); + + ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift; + unsigned ShiftImm = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ','. + if (parseMemRegOffsetShift(ShiftTy, ShiftImm)) + return MatchOperand_ParseFail; + } + + Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy, + ShiftImm, S, E)); + + return MatchOperand_Success; +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Check for a post-index addressing register operand. Specifically: + // am3offset := '+' register + // | '-' register + // | register + // | # imm + // | # + imm + // | # - imm + + // This method must return MatchOperand_NoMatch without consuming any tokens + // in the case where there is no match, as other alternatives take other + // parse methods. + AsmToken Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + // Do immediates first, as we always parse those if we have a '#'. + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat the '#'. + // Explicitly look for a '-', as we need to encode negative zero + // differently. + bool isNegative = Parser.getTok().is(AsmToken::Minus); + const MCExpr *Offset; + if (getParser().ParseExpression(Offset)) + return MatchOperand_ParseFail; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); + if (!CE) { + Error(S, "constant expression expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Tok.getLoc(); + // Negative zero is encoded as the flag value INT32_MIN. + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + Val = INT32_MIN; + + Operands.push_back( + ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E)); + + return MatchOperand_Success; + } + + + bool haveEaten = false; + bool isAdd = true; + int Reg = -1; + if (Tok.is(AsmToken::Plus)) { + Parser.Lex(); // Eat the '+' token. + haveEaten = true; + } else if (Tok.is(AsmToken::Minus)) { + Parser.Lex(); // Eat the '-' token. + isAdd = false; + haveEaten = true; + } + if (Parser.getTok().is(AsmToken::Identifier)) + Reg = tryParseRegister(); + if (Reg == -1) { + if (!haveEaten) + return MatchOperand_NoMatch; + Error(Parser.getTok().getLoc(), "register expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift, + 0, S, E)); + + return MatchOperand_Success; +} + +/// cvtT2LdrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateReg(0)); + // addr + ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtT2StrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateReg(0)); + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + + +/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); + // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } +/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The second source operand must be the same register as the destination + // operand. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + return false; + } + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); + ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1); + // If we have a three-operand form, use that, else the second source operand + // is just the destination operand again. + if (Operands.size() == 6) + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + else + Inst.addOperand(Inst.getOperand(0)); + ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); + + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. -/// -/// TODO Only preindexing and postindexing addressing are started, unindexed -/// with option, etc are still to do. bool ARMAsmParser:: -ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - ARMII::AddrMode AddrMode = ARMII::AddrModeNone) { +parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -1552,185 +3105,178 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Parser.Lex(); // Eat left bracket token. const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) { - Error(BaseRegTok.getLoc(), "register expected"); - return true; - } - int BaseRegNum = TryParseRegister(); - if (BaseRegNum == -1) { - Error(BaseRegTok.getLoc(), "register expected"); - return true; - } + int BaseRegNum = tryParseRegister(); + if (BaseRegNum == -1) + return Error(BaseRegTok.getLoc(), "register expected"); // The next token must either be a comma or a closing bracket. const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) - return true; + return Error(Tok.getLoc(), "malformed memory operand"); - bool Preindexed = false; - bool Postindexed = false; - bool OffsetIsReg = false; - bool Negative = false; - bool Writeback = false; - ARMOperand *WBOp = 0; - int OffsetRegNum = -1; - bool OffsetRegShifted = false; - enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl; - const MCExpr *ShiftAmount = 0; - const MCExpr *Offset = 0; - - // First look for preindexed address forms, that is after the "[Rn" we now - // have to see if the next token is a comma. - if (Tok.is(AsmToken::Comma)) { - Preindexed = true; - Parser.Lex(); // Eat comma token. - - if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, - Offset, OffsetIsReg, OffsetRegNum, E)) - return true; - const AsmToken &RBracTok = Parser.getTok(); - if (RBracTok.isNot(AsmToken::RBrac)) { - Error(RBracTok.getLoc(), "']' expected"); - return true; - } - E = RBracTok.getLoc(); + if (Tok.is(AsmToken::RBrac)) { + E = Tok.getLoc(); Parser.Lex(); // Eat right bracket token. - const AsmToken &ExclaimTok = Parser.getTok(); - if (ExclaimTok.is(AsmToken::Exclaim)) { - // None of addrmode3 instruction uses "!" - if (AddrMode == ARMII::AddrMode3) - return true; + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, + 0, 0, false, S, E)); - WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), - ExclaimTok.getLoc()); - Writeback = true; - Parser.Lex(); // Eat exclaim token - } else { // In addressing mode 2, pre-indexed mode always end with "!" - if (AddrMode == ARMII::AddrMode2) - Preindexed = false; + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. It's rather odd, but syntactically valid. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } - } else { - // The "[Rn" we have so far was not followed by a comma. - // If there's anything other than the right brace, this is a post indexing - // addressing form. - E = Tok.getLoc(); - Parser.Lex(); // Eat right bracket token. + return false; + } - const AsmToken &NextTok = Parser.getTok(); + assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!"); + Parser.Lex(); // Eat the comma. - if (NextTok.isNot(AsmToken::EndOfStatement)) { - Postindexed = true; - Writeback = true; + // If we have a ':', it's an alignment specifier. + if (Parser.getTok().is(AsmToken::Colon)) { + Parser.Lex(); // Eat the ':'. + E = Parser.getTok().getLoc(); - if (NextTok.isNot(AsmToken::Comma)) { - Error(NextTok.getLoc(), "',' expected"); - return true; - } - - Parser.Lex(); // Eat comma token. + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; - if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, - ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, - E)) - return true; + // The expression has to be a constant. Memory references with relocations + // don't come through here, as they use the <label> forms of the relevant + // instructions. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE) + return Error (E, "constant expression expected"); + + unsigned Align = 0; + switch (CE->getValue()) { + default: + return Error(E, "alignment specifier must be 64, 128, or 256 bits"); + case 64: Align = 8; break; + case 128: Align = 16; break; + case 256: Align = 32; break; } - } - // Force Offset to exist if used. - if (!OffsetIsReg) { - if (!Offset) - Offset = MCConstantExpr::Create(0, getContext()); - } else { - if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) { - Error(E, "shift amount not supported"); - return true; + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. + + // Don't worry about range checking the value here. That's handled by + // the is*() predicates. + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, + ARM_AM::no_shift, 0, Align, + false, S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } + + return false; } - Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg, - Offset, OffsetRegNum, OffsetRegShifted, - ShiftType, ShiftAmount, Preindexed, - Postindexed, Negative, Writeback, S, E)); - if (WBOp) - Operands.push_back(WBOp); + // If we have a '#', it's an immediate offset, else assume it's a register + // offset. + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat the '#'. + E = Parser.getTok().getLoc(); - return false; -} + bool isNegative = getParser().getTok().is(AsmToken::Minus); + const MCExpr *Offset; + if (getParser().ParseExpression(Offset)) + return true; + + // The expression has to be a constant. Memory references with relocations + // don't come through here, as they use the <label> forms of the relevant + // instructions. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); + if (!CE) + return Error (E, "constant expression expected"); + + // If the constant was #-0, represent it as INT32_MIN. + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + CE = MCConstantExpr::Create(INT32_MIN, getContext()); + + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. -/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," -/// we will parse the following (were +/- means that a plus or minus is -/// optional): -/// +/-Rm -/// +/-Rm, shift -/// #offset -/// we return false on success or an error otherwise. -bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, - bool &OffsetRegShifted, - enum ARM_AM::ShiftOpc &ShiftType, - const MCExpr *&ShiftAmount, - const MCExpr *&Offset, - bool &OffsetIsReg, - int &OffsetRegNum, - SMLoc &E) { - Negative = false; - OffsetRegShifted = false; - OffsetIsReg = false; - OffsetRegNum = -1; - const AsmToken &NextTok = Parser.getTok(); - E = NextTok.getLoc(); - if (NextTok.is(AsmToken::Plus)) - Parser.Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - Parser.Lex(); // Eat minus token - } - // See if there is a register following the "[Rn," or "[Rn]," we have so far. - const AsmToken &OffsetRegTok = Parser.getTok(); - if (OffsetRegTok.is(AsmToken::Identifier)) { - SMLoc CurLoc = OffsetRegTok.getLoc(); - OffsetRegNum = TryParseRegister(); - if (OffsetRegNum != -1) { - OffsetIsReg = true; - E = CurLoc; + // Don't worry about range checking the value here. That's handled by + // the is*() predicates. + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0, + ARM_AM::no_shift, 0, 0, + false, S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } - } - // If we parsed a register as the offset then there can be a shift after that. - if (OffsetRegNum != -1) { - // Look for a comma then a shift - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Comma)) { - Parser.Lex(); // Eat comma token. + return false; + } - const AsmToken &Tok = Parser.getTok(); - if (ParseShift(ShiftType, ShiftAmount, E)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; - } + // The register offset is optionally preceded by a '+' or '-' + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + Parser.Lex(); // Eat the '-'. + } else if (Parser.getTok().is(AsmToken::Plus)) { + // Nothing to do. + Parser.Lex(); // Eat the '+'. } - else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" - // Look for #offset following the "[Rn," or "[Rn]," - const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - Parser.Lex(); // Eat hash token. + E = Parser.getTok().getLoc(); + int OffsetRegNum = tryParseRegister(); + if (OffsetRegNum == -1) + return Error(E, "register expected"); + + // If there's a shift operator, handle it. + ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift; + unsigned ShiftImm = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ','. + if (parseMemRegOffsetShift(ShiftType, ShiftImm)) + return true; + } - if (getParser().ParseExpression(Offset)) - return true; - E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, + ShiftType, ShiftImm, 0, isNegative, + S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } + return false; } -/// ParseShift as one of these two: +/// parseMemRegOffsetShift - one of these two: /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx -/// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, - const MCExpr *&ShiftAmount, SMLoc &E) { +/// return true if it parses a shift otherwise it returns false. +bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, + unsigned &Amount) { + SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; @@ -1746,28 +3292,86 @@ bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, else if (ShiftName == "rrx" || ShiftName == "RRX") St = ARM_AM::rrx; else - return true; + return Error(Loc, "illegal shift operator"); Parser.Lex(); // Eat shift type token. - // Rrx stands alone. - if (St == ARM_AM::rrx) - return false; - - // Otherwise, there must be a '#' and a shift amount. - const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - Parser.Lex(); // Eat hash token. + // rrx stands alone. + Amount = 0; + if (St != ARM_AM::rrx) { + Loc = Parser.getTok().getLoc(); + // A '#' and a shift amount. + const AsmToken &HashTok = Parser.getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + Parser.Lex(); // Eat hash token. - if (getParser().ParseExpression(ShiftAmount)) - return true; + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; + // Range check the immediate. + // lsl, ror: 0 <= imm <= 31 + // lsr, asr: 0 <= imm <= 32 + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE) + return Error(Loc, "shift amount must be an immediate"); + int64_t Imm = CE->getValue(); + if (Imm < 0 || + ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) || + ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32)) + return Error(Loc, "immediate shift value out of range"); + Amount = Imm; + } return false; } +/// parseFPImm - A floating point immediate expression operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + if (Parser.getTok().isNot(AsmToken::Hash)) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the '#'. + + // Handle negation, as that still comes through as a separate token. + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + Parser.Lex(); + } + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Real)) { + APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); + // If we had a '-' in front, toggle the sign bit. + IntVal ^= (uint64_t)isNegative << 63; + int Val = ARM_AM::getFP64Imm(APInt(64, IntVal)); + Parser.Lex(); // Eat the token. + if (Val == -1) { + TokError("floating point value out of range"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; + } + if (Tok.is(AsmToken::Integer)) { + int64_t Val = Tok.getIntVal(); + Parser.Lex(); // Eat the token. + if (Val > 255 || Val < 0) { + TokError("encoded floating point value out of range"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; + } + + TokError("invalid floating point immediate"); + return MatchOperand_ParseFail; +} /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. -bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, +bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Mnemonic) { SMLoc S, E; @@ -1787,13 +3391,20 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!TryParseRegisterWithWriteBack(Operands)) + // If this is VMRS, check for the apsr_nzcv operand. + if (!tryParseRegisterWithWriteBack(Operands)) return false; - int Res = TryParseShiftRegister(Operands); + int Res = tryParseShiftRegister(Operands); if (Res == 0) // success return false; else if (Res == -1) // irrecoverable error return true; + if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S)); + return false; + } // Fall though for the Identifier case that is not a register or a // special name. @@ -1811,26 +3422,36 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; } case AsmToken::LBrac: - return ParseMemory(Operands); + return parseMemory(Operands); case AsmToken::LCurly: - return ParseRegisterList(Operands); - case AsmToken::Hash: + return parseRegisterList(Operands); + case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate S = Parser.getTok().getLoc(); Parser.Lex(); + bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *ImmVal; if (getParser().ParseExpression(ImmVal)) return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (!CE) { + Error(S, "constant expression expected"); + return MatchOperand_ParseFail; + } + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); return false; + } case AsmToken::Colon: { // ":lower16:" and ":upper16:" expression prefixes // FIXME: Check it's an expression prefix, // e.g. (FOO - :lower16:BAR) isn't legal. ARMMCExpr::VariantKind RefKind; - if (ParsePrefix(RefKind)) + if (parsePrefix(RefKind)) return true; const MCExpr *SubExprVal; @@ -1846,9 +3467,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } } -// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. +// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. -bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) { +bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { RefKind = ARMMCExpr::VK_ARM_None; // :lower16: and :upper16: modifiers @@ -1879,55 +3500,16 @@ bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) { return false; } -const MCExpr * -ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind Variant) { - // Recurse over the given expression, rebuilding it to apply the given variant - // to the leftmost symbol. - if (Variant == MCSymbolRefExpr::VK_None) - return E; - - switch (E->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle target expr yet"); - case MCExpr::Constant: - llvm_unreachable("Can't handle lower16/upper16 of constant yet"); - - case MCExpr::SymbolRef: { - const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E); - - if (SRE->getKind() != MCSymbolRefExpr::VK_None) - return 0; - - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext()); - } - - case MCExpr::Unary: - llvm_unreachable("Can't handle unary expressions yet"); - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(E); - const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant); - const MCExpr *RHS = BE->getRHS(); - if (!LHS) - return 0; - - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext()); - } - } - - assert(0 && "Invalid expression kind!"); - return 0; -} - /// \brief Given a mnemonic, split out possible predication code and carry /// setting letters to form a canonical mnemonic and flags. // // FIXME: Would be nice to autogen this. -static StringRef SplitMnemonic(StringRef Mnemonic, - unsigned &PredicationCode, - bool &CarrySetting, - unsigned &ProcessorIMod) { +// FIXME: This is a bit of a maze of special cases. +StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, + unsigned &PredicationCode, + bool &CarrySetting, + unsigned &ProcessorIMod, + StringRef &ITMask) { PredicationCode = ARMCC::AL; CarrySetting = false; ProcessorIMod = 0; @@ -1935,23 +3517,22 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // Ignore some mnemonics we know aren't predicated forms. // // FIXME: Would be nice to autogen this. - if (Mnemonic == "teq" || Mnemonic == "vceq" || - Mnemonic == "movs" || - Mnemonic == "svc" || - (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || - Mnemonic == "vmls" || Mnemonic == "vnmls") || - Mnemonic == "vacge" || Mnemonic == "vcge" || - Mnemonic == "vclt" || - Mnemonic == "vacgt" || Mnemonic == "vcgt" || - Mnemonic == "vcle" || - (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || - Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || - Mnemonic == "vqdmlal" || Mnemonic == "bics")) + if ((Mnemonic == "movs" && isThumb()) || + Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" || + Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || + Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || + Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || + Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || + Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || + Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't // predicated but do have a carry-set and so weren't caught above. - if (Mnemonic != "adcs") { + if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" && + Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" && + Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" && + Mnemonic != "sbcs" && Mnemonic != "rscs") { unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) .Case("eq", ARMCC::EQ) .Case("ne", ARMCC::NE) @@ -1980,11 +3561,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // Next, determine if we have a carry setting bit. We explicitly ignore all // the instructions we know end in 's'. if (Mnemonic.endswith("s") && - !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" || - Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" || - Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || - Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || - Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) { + !(Mnemonic == "cps" || Mnemonic == "mls" || + Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || + Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || + Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || + (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; } @@ -2004,6 +3586,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic, } } + // The "it" instruction has the condition mask on the end of the mnemonic. + if (Mnemonic.startswith("it")) { + ITMask = Mnemonic.slice(2, Mnemonic.size()); + Mnemonic = Mnemonic.slice(0, 2); + } + return Mnemonic; } @@ -2012,37 +3600,154 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // // FIXME: It would be nice to autogen this. void ARMAsmParser:: -GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, +getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || - Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" || + Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" || - Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" || + Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || - Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" || - Mnemonic == "eor" || Mnemonic == "smlal" || - (Mnemonic == "mov" && !isThumbOne())) { + Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || + Mnemonic == "mla" || Mnemonic == "smlal" || + Mnemonic == "umlal" || Mnemonic == "umull"))) { CanAcceptCarrySet = true; - } else { + } else CanAcceptCarrySet = false; - } if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || - Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" || - Mnemonic == "clrex" || Mnemonic.startswith("cps")) { + Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" || + (Mnemonic == "clrex" && !isThumb()) || + (Mnemonic == "nop" && isThumbOne()) || + ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || + Mnemonic == "ldc2" || Mnemonic == "ldc2l" || + Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) || + ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) && + !isThumb()) || + Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) { CanAcceptPredicationCode = false; - } else { + } else CanAcceptPredicationCode = true; - } - if (isThumb()) + if (isThumb()) { if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") CanAcceptPredicationCode = false; + } +} + +bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // FIXME: This is all horribly hacky. We really need a better way to deal + // with optional operands like this in the matcher table. + + // The 'mov' mnemonic is special. One variant has a cc_out operand, while + // another does not. Specifically, the MOVW instruction does not. So we + // special case it here and remove the defaulted (non-setting) cc_out + // operand if that's the instruction we're trying to match. + // + // We do this as post-processing of the explicit operands rather than just + // conditionally adding the cc_out in the first place because we need + // to check the type of the parsed immediate operand. + if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && + !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() && + static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + + // Register-register 'add' for thumb does not have a cc_out operand + // when there are only two register operands. + if (isThumb() && Mnemonic == "add" && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + // Register-register 'add' for thumb does not have a cc_out operand + // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do + // have to check the immediate range here since Thumb2 has a variant + // that can handle a different range and has a cc_out operand. + if (((isThumb() && Mnemonic == "add") || + (isThumbTwo() && Mnemonic == "sub")) && + Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[5])->isReg() || + static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) + return true; + // For Thumb2, add/sub immediate does not have a cc_out operand for the + // imm0_4095 variant. That's the least-preferred variant when + // selecting via the generic "add" mnemonic, so to know that we + // should remove the cc_out operand, we have to explicitly check that + // it's not one of the other variants. Ugh. + if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && + Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + // Nest conditions rather than one big 'if' statement for readability. + // + // If either register is a high reg, it's either one of the SP + // variants (handled above) or a 32-bit encoding, so we just + // check against T3. + if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && + static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + return false; + // If both registers are low, we're in an IT block, and the immediate is + // in range, we should use encoding T1 instead, which has a cc_out. + if (inITBlock() && + isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && + static_cast<ARMOperand*>(Operands[5])->isImm0_7()) + return false; + + // Otherwise, we use encoding T4, which does not have a cc_out + // operand. + return true; + } + + // The thumb2 multiply instruction doesn't have a CCOut register, so + // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to + // use the 16-bit encoding or not. + if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[5])->isReg() && + // If the registers aren't low regs, the destination reg isn't the + // same as one of the source regs, or the cc_out operand is zero + // outside of an IT block, we have to use the 32-bit encoding, so + // remove the cc_out operand. + (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !inITBlock() || + (static_cast<ARMOperand*>(Operands[3])->getReg() != + static_cast<ARMOperand*>(Operands[5])->getReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() != + static_cast<ARMOperand*>(Operands[4])->getReg()))) + return true; + + + + // Register-register 'add/sub' for thumb does not have a cc_out operand + // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also + // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't + // right, this will result in better diagnostics (which operand is off) + // anyway. + if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") && + (Operands.size() == 5 || Operands.size() == 6) && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + + return false; } /// Parse an arm instruction mnemonic followed by its operands. @@ -2050,16 +3755,51 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); + StringRef Mnemonic = Name.slice(Start, Next); // Split out the predication code and carry setting flag from the mnemonic. unsigned PredicationCode; unsigned ProcessorIMod; bool CarrySetting; - Head = SplitMnemonic(Head, PredicationCode, CarrySetting, - ProcessorIMod); + StringRef ITMask; + Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting, + ProcessorIMod, ITMask); + + // In Thumb1, only the branch (B) instruction can be predicated. + if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { + Parser.EatToEndOfStatement(); + return Error(NameLoc, "conditional execution not supported in Thumb1"); + } + + Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc)); - Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); + // Handle the IT instruction ITMask. Convert it to a bitmask. This + // is the mask as it will be for the IT encoding if the conditional + // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case + // where the conditional bit0 is zero, the instruction post-processing + // will adjust the mask accordingly. + if (Mnemonic == "it") { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); + if (ITMask.size() > 3) { + Parser.EatToEndOfStatement(); + return Error(Loc, "too many conditions on IT instruction"); + } + unsigned Mask = 8; + for (unsigned i = ITMask.size(); i != 0; --i) { + char pos = ITMask[i - 1]; + if (pos != 't' && pos != 'e') { + Parser.EatToEndOfStatement(); + return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); + } + Mask >>= 1; + if (ITMask[i - 1] == 't') + Mask |= 8; + } + Operands.push_back(ARMOperand::CreateITMask(Mask, Loc)); + } + + // FIXME: This is all a pretty gross hack. We should automatically handle + // optional operands like this via tblgen. // Next, add the CCOut and ConditionCode operands, if needed. // @@ -2069,34 +3809,36 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, // the matcher deal with finding the right instruction or generating an // appropriate error. bool CanAcceptCarrySet, CanAcceptPredicationCode; - GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode); + getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode); // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { Parser.EatToEndOfStatement(); - return Error(NameLoc, "instruction '" + Head + + return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } + // If we had a predication code on an instruction that can't do that, issue an + // error. + if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { + Parser.EatToEndOfStatement(); + return Error(NameLoc, "instruction '" + Mnemonic + + "' is not predicable, but condition code specified"); + } // Add the carry setting operand, if necessary. - // - // FIXME: It would be awesome if we could somehow invent a location such that - // match errors on this operand would print a nice diagnostic about how the - // 's' character in the mnemonic resulted in a CCOut operand. - if (CanAcceptCarrySet) + if (CanAcceptCarrySet) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size()); Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0, - NameLoc)); + Loc)); + } // Add the predication code operand, if necessary. if (CanAcceptPredicationCode) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() + + CarrySetting); Operands.push_back(ARMOperand::CreateCondCode( - ARMCC::CondCodes(PredicationCode), NameLoc)); - } else { - // This mnemonic can't ever accept a predication code, but the user wrote - // one (or misspelled another mnemonic). - - // FIXME: Issue a nice error. + ARMCC::CondCodes(PredicationCode), Loc)); } // Add the processor imod operand, if necessary. @@ -2104,11 +3846,6 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.push_back(ARMOperand::CreateImm( MCConstantExpr::Create(ProcessorIMod, getContext()), NameLoc, NameLoc)); - } else { - // This mnemonic can't ever accept a imod, but the user wrote - // one (or misspelled another mnemonic). - - // FIXME: Issue a nice error. } // Add the remaining tokens in the mnemonic. @@ -2117,13 +3854,19 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); - Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc)); + // For now, we're only parsing Thumb1 (for the most part), so + // just ignore ".n" qualifiers. We'll use them to restrict + // matching when we do Thumb2. + if (ExtraToken != ".n") { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); + } } // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Head)) { + if (parseOperand(Operands, Mnemonic)) { Parser.EatToEndOfStatement(); return true; } @@ -2132,7 +3875,7 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Parser.Lex(); // Eat the comma. // Parse and remember the operand. - if (ParseOperand(Operands, Head)) { + if (parseOperand(Operands, Mnemonic)) { Parser.EatToEndOfStatement(); return true; } @@ -2140,75 +3883,548 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); Parser.EatToEndOfStatement(); - return TokError("unexpected token in argument list"); + return Error(Loc, "unexpected token in argument list"); } Parser.Lex(); // Consume the EndOfStatement + + // Some instructions, mostly Thumb, have forms for the same mnemonic that + // do and don't have a cc_out optional-def operand. With some spot-checks + // of the operand list, we can figure out which variant we're trying to + // parse and adjust accordingly before actually matching. We shouldn't ever + // try to remove a cc_out operand that was explicitly set on the the + // mnemonic, of course (CarrySetting == true). Reason number #317 the + // table driven matcher doesn't fit well with the ARM instruction set. + if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + + // ARM mode 'blx' need special handling, as the register operand version + // is predicable, but the label operand version is not. So, we can't rely + // on the Mnemonic based checking to correctly figure out when to put + // a k_CondCode operand in the list. If we're trying to match the label + // version, remove the k_CondCode operand here. + if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 && + static_cast<ARMOperand*>(Operands[2])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + + // The vector-compare-to-zero instructions have a literal token "#0" at + // the end that comes to here as an immediate operand. Convert it to a + // token to play nicely with the matcher. + if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" || + Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 5); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + // VCMP{E} does the same thing, but with a different operand count. + if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[4])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 4); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the + // end. Convert it to a token here. + if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 5); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + + return false; +} + +// Validate context-sensitive operand constraints. + +// return 'true' if register list contains non-low GPR registers, +// 'false' otherwise. If Reg is in the register list or is HiReg, set +// 'containsReg' to true. +static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, + unsigned HiReg, bool &containsReg) { + containsReg = false; + for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { + unsigned OpReg = Inst.getOperand(i).getReg(); + if (OpReg == Reg) + containsReg = true; + // Anything other than a low register isn't legal here. + if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg)) + return true; + } + return false; +} + +// Check if the specified regisgter is in the register list of the inst, +// starting at the indicated operand number. +static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { + for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { + unsigned OpReg = Inst.getOperand(i).getReg(); + if (OpReg == Reg) + return true; + } + return false; +} + +// FIXME: We would really prefer to have MCInstrInfo (the wrapper around +// the ARMInsts array) instead. Getting that here requires awkward +// API changes, though. Better way? +namespace llvm { +extern MCInstrDesc ARMInsts[]; +} +static MCInstrDesc &getInstDesc(unsigned Opcode) { + return ARMInsts[Opcode]; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool ARMAsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + SMLoc Loc = Operands[0]->getStartLoc(); + // Check the IT block state first. + // NOTE: In Thumb mode, the BKPT instruction has the interesting property of + // being allowed in IT blocks, but not being predicable. It just always + // executes. + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + unsigned bit = 1; + if (ITState.FirstCond) + ITState.FirstCond = false; + else + bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; + // The instruction must be predicable. + if (!MCID.isPredicable()) + return Error(Loc, "instructions in IT block must be predicable"); + unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm(); + unsigned ITCond = bit ? ITState.Cond : + ARMCC::getOppositeCondition(ITState.Cond); + if (Cond != ITCond) { + // Find the condition code Operand to get its SMLoc information. + SMLoc CondLoc; + for (unsigned i = 1; i < Operands.size(); ++i) + if (static_cast<ARMOperand*>(Operands[i])->isCondCode()) + CondLoc = Operands[i]->getStartLoc(); + return Error(CondLoc, "incorrect condition in IT block; got '" + + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + + "', but expected '" + + ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'"); + } + // Check for non-'al' condition codes outside of the IT block. + } else if (isThumbTwo() && MCID.isPredicable() && + Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != + ARMCC::AL && Inst.getOpcode() != ARM::tB && + Inst.getOpcode() != ARM::t2B) + return Error(Loc, "predicated instructions must be in IT block"); + + switch (Inst.getOpcode()) { + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + case ARM::LDREXD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "destination operands must be sequential"); + return false; + } + case ARM::STRD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "source operands must be sequential"); + return false; + } + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::STREXD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "source operands must be sequential"); + return false; + } + case ARM::SBFX: + case ARM::UBFX: { + // width must be in range [1, 32-lsb] + unsigned lsb = Inst.getOperand(2).getImm(); + unsigned widthm1 = Inst.getOperand(3).getImm(); + if (widthm1 >= 32 - lsb) + return Error(Operands[5]->getStartLoc(), + "bitfield width must be in range [1,32-lsb]"); + return false; + } + case ARM::tLDMIA: { + // If we're parsing Thumb2, the .w variant is available and handles + // most cases that are normally illegal for a Thumb1 LDM + // instruction. We'll make the transformation in processInstruction() + // if necessary. + // + // Thumb LDM instructions are writeback iff the base register is not + // in the register list. + unsigned Rn = Inst.getOperand(0).getReg(); + bool hasWritebackToken = + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo()) + return Error(Operands[3 + hasWritebackToken]->getStartLoc(), + "registers must be in range r0-r7"); + // If we should have writeback, then there should be a '!' token. + if (!listContainsBase && !hasWritebackToken && !isThumbTwo()) + return Error(Operands[2]->getStartLoc(), + "writeback operator '!' expected"); + // If we should not have writeback, there must not be a '!'. This is + // true even for the 32-bit wide encodings. + if (listContainsBase && hasWritebackToken) + return Error(Operands[3]->getStartLoc(), + "writeback operator '!' not allowed when base register " + "in register list"); + + break; + } + case ARM::t2LDMIA_UPD: { + if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) + return Error(Operands[4]->getStartLoc(), + "writeback operator '!' not allowed when base register " + "in register list"); + break; + } + case ARM::tPOP: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase)) + return Error(Operands[2]->getStartLoc(), + "registers must be in range r0-r7 or pc"); + break; + } + case ARM::tPUSH: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase)) + return Error(Operands[2]->getStartLoc(), + "registers must be in range r0-r7 or lr"); + break; + } + case ARM::tSTMIA_UPD: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo()) + return Error(Operands[4]->getStartLoc(), + "registers must be in range r0-r7"); + break; + } + } + return false; } +void ARMAsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case ARM::LDMIA_UPD: + // If this is a load of a single register via a 'pop', then we should use + // a post-indexed LDR instruction instead, per the ARM ARM. + if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" && + Inst.getNumOperands() == 5) { + MCInst TmpInst; + TmpInst.setOpcode(ARM::LDR_POST_IMM); + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset + TmpInst.addOperand(MCOperand::CreateImm(4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + case ARM::STMDB_UPD: + // If this is a store of a single register via a 'push', then we should use + // a pre-indexed STR instruction instead, per the ARM ARM. + if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" && + Inst.getNumOperands() == 5) { + MCInst TmpInst; + TmpInst.setOpcode(ARM::STR_PRE_IMM); + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12 + TmpInst.addOperand(MCOperand::CreateImm(-4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + case ARM::tADDi8: + // If the immediate is in the range 0-7, we want tADDi3 iff Rd was + // explicitly specified. From the ARM ARM: "Encoding T1 is preferred + // to encoding T2 if <Rd> is specified and encoding T2 is preferred + // to encoding T1 if <Rd> is omitted." + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + Inst.setOpcode(ARM::tADDi3); + break; + case ARM::tSUBi8: + // If the immediate is in the range 0-7, we want tADDi3 iff Rd was + // explicitly specified. From the ARM ARM: "Encoding T1 is preferred + // to encoding T2 if <Rd> is specified and encoding T2 is preferred + // to encoding T1 if <Rd> is omitted." + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + Inst.setOpcode(ARM::tSUBi3); + break; + case ARM::tB: + // A Thumb conditional branch outside of an IT block is a tBcc. + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + Inst.setOpcode(ARM::tBcc); + break; + case ARM::t2B: + // A Thumb2 conditional branch outside of an IT block is a t2Bcc. + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + Inst.setOpcode(ARM::t2Bcc); + break; + case ARM::t2Bcc: + // If the conditional is AL or we're in an IT block, we really want t2B. + if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) + Inst.setOpcode(ARM::t2B); + break; + case ARM::tBcc: + // If the conditional is AL, we really want tB. + if (Inst.getOperand(1).getImm() == ARMCC::AL) + Inst.setOpcode(ARM::tB); + break; + case ARM::tLDMIA: { + // If the register list contains any high registers, or if the writeback + // doesn't match what tLDMIA can do, we need to use the 32-bit encoding + // instead if we're in Thumb2. Otherwise, this should have generated + // an error in validateInstruction(). + unsigned Rn = Inst.getOperand(0).getReg(); + bool hasWritebackToken = + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) || + (!listContainsBase && !hasWritebackToken) || + (listContainsBase && hasWritebackToken)) { + // 16-bit encoding isn't sufficient. Switch to the 32-bit version. + assert (isThumbTwo()); + Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA); + // If we're switching to the updating version, we need to insert + // the writeback tied operand. + if (hasWritebackToken) + Inst.insert(Inst.begin(), + MCOperand::CreateReg(Inst.getOperand(0).getReg())); + } + break; + } + case ARM::tSTMIA_UPD: { + // If the register list contains any high registers, we need to use + // the 32-bit encoding instead if we're in Thumb2. Otherwise, this + // should have generated an error in validateInstruction(). + unsigned Rn = Inst.getOperand(0).getReg(); + bool listContainsBase; + if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) { + // 16-bit encoding isn't sufficient. Switch to the 32-bit version. + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2STMIA_UPD); + } + break; + } + case ARM::t2MOVi: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(1).getImm() <= 255 && + ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && + Inst.getOperand(4).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(4).getReg() == 0)) && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + // The operands aren't in the same order for tMOVi8... + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + } + case ARM::t2MOVr: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + Inst.getOperand(2).getImm() == ARMCC::AL && + Inst.getOperand(4).getReg() == ARM::CPSR && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + // The operands aren't the same for tMOV[S]r... (no cc_out) + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + } + case ARM::t2SXTH: + case ARM::t2SXTB: + case ARM::t2UXTH: + case ARM::t2UXTB: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + Inst.getOperand(2).getImm() == 0 && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Illegal opcode!"); + case ARM::t2SXTH: NewOpc = ARM::tSXTH; break; + case ARM::t2SXTB: NewOpc = ARM::tSXTB; break; + case ARM::t2UXTH: NewOpc = ARM::tUXTH; break; + case ARM::t2UXTB: NewOpc = ARM::tUXTB; break; + } + // The operands aren't the same for thumb1 (no rotate operand). + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + } + break; + } + case ARM::t2IT: { + // The mask bits for all but the first condition are represented as + // the low bit of the condition code value implies 't'. We currently + // always have 1 implies 't', so XOR toggle the bits if the low bit + // of the condition code is zero. The encoding also expects the low + // bit of the condition to be encoded as bit 4 of the mask operand, + // so mask that in if needed + MCOperand &MO = Inst.getOperand(1); + unsigned Mask = MO.getImm(); + unsigned OrigMask = Mask; + unsigned TZ = CountTrailingZeros_32(Mask); + if ((Inst.getOperand(0).getImm() & 1) == 0) { + assert(Mask && TZ <= 3 && "illegal IT mask value!"); + for (unsigned i = 3; i != TZ; --i) + Mask ^= 1 << i; + } else + Mask |= 0x10; + MO.setImm(Mask); + + // Set up the IT block state according to the IT instruction we just + // matched. + assert(!inITBlock() && "nested IT blocks?!"); + ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm()); + ITState.Mask = OrigMask; // Use the original mask, not the updated one. + ITState.CurPosition = 0; + ITState.FirstCond = true; + break; + } + } +} + +unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + // 16-bit thumb arithmetic instructions either require or preclude the 'S' + // suffix depending on whether they're in an IT block or not. + unsigned Opc = Inst.getOpcode(); + MCInstrDesc &MCID = getInstDesc(Opc); + if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { + assert(MCID.hasOptionalDef() && + "optionally flag setting instruction missing optional def operand"); + assert(MCID.NumOperands == Inst.getNumOperands() && + "operand count mismatch!"); + // Find the optional-def operand (cc_out). + unsigned OpNo; + for (OpNo = 0; + !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands; + ++OpNo) + ; + // If we're parsing Thumb1, reject it completely. + if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR) + return Match_MnemonicFail; + // If we're parsing Thumb2, which form is legal depends on whether we're + // in an IT block. + if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR && + !inITBlock()) + return Match_RequiresITBlock; + if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR && + inITBlock()) + return Match_RequiresNotITBlock; + } + // Some high-register supporting Thumb1 encodings only allow both registers + // to be from r0-r7 when in Thumb2. + else if (Opc == ARM::tADDhirr && isThumbOne() && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) + return Match_RequiresThumb2; + // Others only require ARMv6 or later. + else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() && + isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg())) + return Match_RequiresV6; + return Match_Success; +} + bool ARMAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; unsigned ErrorInfo; - MatchResultTy MatchResult, MatchResult2; + unsigned MatchResult; MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult != Match_Success) { - // If we get a Match_InvalidOperand it might be some arithmetic instruction - // that does not update the condition codes. So try adding a CCOut operand - // with a value of reg0. - if (MatchResult == Match_InvalidOperand) { - Operands.insert(Operands.begin() + 1, - ARMOperand::CreateCCOut(0, - ((ARMOperand*)Operands[0])->getStartLoc())); - MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult2 == Match_Success) - MatchResult = Match_Success; - else { - ARMOperand *CCOut = ((ARMOperand*)Operands[1]); - Operands.erase(Operands.begin() + 1); - delete CCOut; - } - } - // If we get a Match_MnemonicFail it might be some arithmetic instruction - // that updates the condition codes if it ends in 's'. So see if the - // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut - // operand with a value of CPSR. - else if (MatchResult == Match_MnemonicFail) { - // Get the instruction mnemonic, which is the first token. - StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken(); - if (Mnemonic.substr(Mnemonic.size()-1) == "s") { - // removed the 's' from the mnemonic for matching. - StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1); - SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc(); - ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); - Operands.erase(Operands.begin()); - delete OldMnemonic; - Operands.insert(Operands.begin(), - ARMOperand::CreateToken(MnemonicNoS, NameLoc)); - Operands.insert(Operands.begin() + 1, - ARMOperand::CreateCCOut(ARM::CPSR, NameLoc)); - MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult2 == Match_Success) - MatchResult = Match_Success; - else { - ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); - Operands.erase(Operands.begin()); - delete OldMnemonic; - Operands.insert(Operands.begin(), - ARMOperand::CreateToken(Mnemonic, NameLoc)); - ARMOperand *CCOut = ((ARMOperand*)Operands[1]); - Operands.erase(Operands.begin() + 1); - delete CCOut; - } - } - } - } switch (MatchResult) { + default: break; case Match_Success: + // Context sensitive operand constraints aren't handled by the matcher, + // so check them here. + if (validateInstruction(Inst, Operands)) { + // Still progress the IT block, otherwise one wrong condition causes + // nasty cascading errors. + forwardITPosition(); + return true; + } + + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. + processInstruction(Inst, Operands); + + // Only move forward at the very end so that everything in validate + // and process gets a consistent answer about whether we're in an IT + // block. + forwardITPosition(); + Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -2227,34 +4443,43 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); + return Error(IDLoc, "invalid instruction"); case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); + // The converter function will have already emited a diagnostic. + return true; + case Match_RequiresNotITBlock: + return Error(IDLoc, "flag setting instruction only valid outside IT block"); + case Match_RequiresITBlock: + return Error(IDLoc, "instruction only valid inside IT block"); + case Match_RequiresV6: + return Error(IDLoc, "instruction variant requires ARMv6 or later"); + case Match_RequiresThumb2: + return Error(IDLoc, "instruction variant requires Thumb2"); } llvm_unreachable("Implement any new match types added!"); return true; } -/// ParseDirective parses the arm specific directives +/// parseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); + return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") - return ParseDirectiveThumb(DirectiveID.getLoc()); + return parseDirectiveThumb(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") - return ParseDirectiveThumbFunc(DirectiveID.getLoc()); + return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") - return ParseDirectiveCode(DirectiveID.getLoc()); + return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") - return ParseDirectiveSyntax(DirectiveID.getLoc()); + return parseDirectiveSyntax(DirectiveID.getLoc()); return true; } -/// ParseDirectiveWord +/// parseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { +bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -2277,9 +4502,9 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } -/// ParseDirectiveThumb +/// parseDirectiveThumb /// ::= .thumb -bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { +bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); Parser.Lex(); @@ -2290,9 +4515,9 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { return false; } -/// ParseDirectiveThumbFunc +/// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name -bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { +bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); bool isMachO = MAI.hasSubsectionsViaSymbols(); StringRef Name; @@ -2322,9 +4547,9 @@ bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { return false; } -/// ParseDirectiveSyntax +/// parseDirectiveSyntax /// ::= .syntax unified | divided -bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { +bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); @@ -2345,9 +4570,9 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { return false; } -/// ParseDirectiveCode +/// parseDirectiveCode /// ::= .code 16 | 32 -bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { +bool ARMAsmParser::parseDirectiveCode(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Integer)) return Error(L, "unexpected token in .code directive"); @@ -2380,8 +4605,8 @@ extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { - RegisterAsmParser<ARMAsmParser> X(TheARMTarget); - RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); + RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); + RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); LLVMInitializeARMAsmLexer(); } diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index bdce2c4..8f2f813 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -6,584 +6,4077 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code to implement the public interfaces of ARMDisassembler and -// ThumbDisassembler, both of which are instances of MCDisassembler. -// -//===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-disassembler" -#include "ARMDisassembler.h" -#include "ARMDisassemblerCore.h" - -#include "llvm/ADT/OwningPtr.h" +#include "ARM.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -//#define DEBUG(X) do { X; } while (0) - -/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from -/// ARMDecoderEmitter.cpp TableGen backend. It contains: -/// -/// o Mappings from opcode to ARM/Thumb instruction format -/// -/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function -/// for an ARM instruction. -/// -/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding -/// function for a Thumb instruction. -/// -#include "ARMGenDecoderTables.inc" +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; +namespace { +/// ARMDisassembler - ARM disassembler for all ARM platforms. +class ARMDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + ARMDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { + } + + ~ARMDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; +private: +}; + +/// ThumbDisassembler - Thumb disassembler for all Thumb platforms. +class ThumbDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + ThumbDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { + } + + ~ThumbDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; +private: + mutable std::vector<unsigned> ITBlock; + DecodeStatus AddThumbPredicate(MCInst&) const; + void UpdateThumbVFPPredicate(MCInst&) const; +}; +} + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + return false; +} + + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, + unsigned Insn, + uint64_t Adddress, + const void *Decoder); +static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + + + +#include "ARMGenDisassemblerTables.inc" +#include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" -using namespace llvm; +static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { + return new ARMDisassembler(STI); +} + +static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) { + return new ThumbDisassembler(STI); +} -/// showBitVector - Use the raw_ostream to log a diagnostic message describing -/// the inidividual bits of the instruction. -/// -static inline void showBitVector(raw_ostream &os, const uint32_t &insn) { - // Split the bit position markers into more than one lines to fit 80 columns. - os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11" - << " 10 9 8 7 6 5 4 3 2 1 0 \n"; - os << "---------------------------------------------------------------" - << "----------------------------------\n"; - os << '|'; - for (unsigned i = 32; i != 0; --i) { - if (insn >> (i - 1) & 0x01) - os << " 1"; +EDInstInfo *ARMDisassembler::getEDInfo() const { + return instInfoARM; +} + +EDInstInfo *ThumbDisassembler::getEDInfo() const { + return instInfoARM; +} + +DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + assert(!(STI.getFeatureBits() & ARM::ModeThumb) && + "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + // VFP and NEON instructions, similarly, are shared between ARM + // and Thumb modes. + MI.clear(); + result = decodeVFPInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + result = decodeNEONDataInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + result = decodeNEONDupInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + + Size = 0; + return MCDisassembler::Fail; +} + +namespace llvm { +extern MCInstrDesc ARMInsts[]; +} + +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. The immediate Value has had any PC +/// adjustment made by the caller. If the instruction is a branch instruction +/// then isBranch is true, else false. If the getOpInfo() function was set as +/// part of the setupForSymbolicDisassembly() call then that function is called +/// to get any symbolic information at the Address for this instruction. If +/// that returns non-zero then the symbolic information it returns is used to +/// create an MCExpr and that is added as an operand to the MCInst. If +/// getOpInfo() returns zero and isBranch is true then a symbol look up for +/// Value is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with Value is created. This function returns true if it adds an +/// operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, + bool isBranch, uint64_t InstSize, + MCInst &MI, const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); + if (!getOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + void *DisInfo = Dis->getDisInfoBlock(); + if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (isBranch) { + LLVMSymbolLookupCallback SymbolLookUp = + Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + uint64_t ReferenceType; + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + SymbolicOp.Value = 0; + } + else { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + } + else { + return false; + } + } + else { + return false; + } + } + + MCContext *Ctx = Dis->getMCContext(); + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); else - os << " 0"; - os << (i%4 == 1 ? '|' : ':'); - } - os << '\n'; - // Split the bit position markers into more than one lines to fit 80 columns. - os << "---------------------------------------------------------------" - << "----------------------------------\n"; - os << '\n'; -} - -/// decodeARMInstruction is a decorator function which tries special cases of -/// instruction matching before calling the auto-generated decoder function. -static unsigned decodeARMInstruction(uint32_t &insn) { - if (slice(insn, 31, 28) == 15) - goto AutoGenedDecoder; - - // Special case processing, if any, goes here.... - - // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB. - // The insufficient encoding information of the combined instruction confuses - // the decoder wrt BFC/BFI. Therefore, we try to recover here. - // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111. - // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111. - if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) { - if (slice(insn, 3, 0) == 15) - return ARM::BFC; + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); else - return ARM::BFI; - } - - // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings - // A1 & A2. - // As a result, the decoder fails to deocode USAT properly. - if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) - return ARM::USAT; - // As a result, the decoder fails to deocode UQADD16 properly. - if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) - return ARM::UQADD16; - - // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. - // As a result, the decoder fails to decode UMULL properly. - if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { - return ARM::UMULL; - } - - // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195. - // As a result, the decoder fails to decode SBFX properly. - if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5) - return ARM::SBFX; - - // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198. - // As a result, the decoder fails to decode UBFX properly. - if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5) - return ARM::UBFX; - - // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. - // As a result, the decoder fails to deocode SSAT properly. - if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) - return ARM::SSAT; - - // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. - // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. - if (slice(insn, 27, 24) == 0) { - switch (slice(insn, 21, 20)) { - case 2: - switch (slice(insn, 7, 4)) { - case 11: - return ARM::STRHT; - default: - break; // fallthrough + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert(0 && "bad SymbolicOp.VariantKind"); + + return true; +} + +/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being +/// referenced by a load instruction with the base register that is the Pc. +/// These can often be values in a literal pool near the Address of the +/// instruction. The Address of the instruction and its immediate Value are +/// used as a possible literal pool entry. The SymbolLookUp call back will +/// return the name of a symbol referenced by the the literal pool's entry if +/// the referenced address is that of a symbol. Or it will return a pointer to +/// a literal 'C' string if the referenced address of the literal pool's entry +/// is an address into a section with 'C' string literals. +static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, + const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + void *DisInfo = Dis->getDisInfoBlock(); + uint64_t ReferenceType; + ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; + const char *ReferenceName; + (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr || + ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + (*Dis->CommentStream) << "literal pool for: " << ReferenceName; + } +} + +// Thumb1 instructions don't have explicit S bits. Rather, they +// implicitly set CPSR. Since it's not represented in the encoding, the +// auto-generated decoder won't inject the CPSR operand. We need to fix +// that as a post-pass. +static void AddThumb1SBit(MCInst &MI, bool InITBlock) { + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + MCInst::iterator I = MI.begin(); + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (I == MI.end()) break; + if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) { + if (i > 0 && OpInfo[i-1].isPredicate()) continue; + MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR)); + return; + } + } + + MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR)); +} + +// Most Thumb instructions don't have explicit predicates in the +// encoding, but rather get their predicates from IT context. We need +// to fix up the predicate operands using this context information as a +// post-pass. +MCDisassembler::DecodeStatus +ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { + MCDisassembler::DecodeStatus S = Success; + + // A few instructions actually have predicates encoded in them. Don't + // try to overwrite it if we're seeing one of those. + switch (MI.getOpcode()) { + case ARM::tBcc: + case ARM::t2Bcc: + case ARM::tCBZ: + case ARM::tCBNZ: + case ARM::tCPS: + case ARM::t2CPS3p: + case ARM::t2CPS2p: + case ARM::t2CPS1p: + case ARM::tMOVSr: + case ARM::tSETEND: + // Some instructions (mostly conditional branches) are not + // allowed in IT blocks. + if (!ITBlock.empty()) + S = SoftFail; + else + return Success; + break; + case ARM::tB: + case ARM::t2B: + case ARM::t2TBB: + case ARM::t2TBH: + // Some instructions (mostly unconditional branches) can + // only appears at the end of, or outside of, an IT. + if (ITBlock.size() > 1) + S = SoftFail; + break; + default: + break; + } + + // If we're in an IT block, base the predicate on that. Otherwise, + // assume a predicate of AL. + unsigned CC; + if (!ITBlock.empty()) { + CC = ITBlock.back(); + if (CC == 0xF) + CC = ARMCC::AL; + ITBlock.pop_back(); + } else + CC = ARMCC::AL; + + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + MCInst::iterator I = MI.begin(); + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (I == MI.end()) break; + if (OpInfo[i].isPredicate()) { + I = MI.insert(I, MCOperand::CreateImm(CC)); + ++I; + if (CC == ARMCC::AL) + MI.insert(I, MCOperand::CreateReg(0)); + else + MI.insert(I, MCOperand::CreateReg(ARM::CPSR)); + return S; + } + } + + I = MI.insert(I, MCOperand::CreateImm(CC)); + ++I; + if (CC == ARMCC::AL) + MI.insert(I, MCOperand::CreateReg(0)); + else + MI.insert(I, MCOperand::CreateReg(ARM::CPSR)); + + return S; +} + +// Thumb VFP instructions are a special case. Because we share their +// encodings between ARM and Thumb modes, and they are predicable in ARM +// mode, the auto-generated decoder will give them an (incorrect) +// predicate operand. We need to rewrite these operands based on the IT +// context as a post-pass. +void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const { + unsigned CC; + if (!ITBlock.empty()) { + CC = ITBlock.back(); + ITBlock.pop_back(); + } else + CC = ARMCC::AL; + + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + MCInst::iterator I = MI.begin(); + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (OpInfo[i].isPredicate() ) { + I->setImm(CC); + ++I; + if (CC == ARMCC::AL) + I->setReg(0); + else + I->setReg(ARM::CPSR); + return; + } + } +} + +DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + assert((STI.getFeatureBits() & ARM::ModeThumb) && + "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); + + // We want to read exactly 2 bytes of data. + if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + uint16_t insn16 = (bytes[1] << 8) | bytes[0]; + DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 2; + Check(result, AddThumbPredicate(MI)); + return result; + } + + MI.clear(); + result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI); + if (result) { + Size = 2; + bool InITBlock = !ITBlock.empty(); + Check(result, AddThumbPredicate(MI)); + AddThumb1SBit(MI, InITBlock); + return result; + } + + MI.clear(); + result = decodeThumb2Instruction16(MI, insn16, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 2; + + // Nested IT blocks are UNPREDICTABLE. Must be checked before we add + // the Thumb predicate. + if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty()) + result = MCDisassembler::SoftFail; + + Check(result, AddThumbPredicate(MI)); + + // If we find an IT instruction, we need to parse its condition + // code and mask operands so that we can apply them correctly + // to the subsequent instructions. + if (MI.getOpcode() == ARM::t2IT) { + + // (3 - the number of trailing zeros) is the number of then / else. + unsigned firstcond = MI.getOperand(0).getImm(); + unsigned Mask = MI.getOperand(1).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + ITBlock.insert(ITBlock.begin(), firstcond); + else + ITBlock.insert(ITBlock.begin(), firstcond ^ 1); } + + ITBlock.push_back(firstcond); + } + + return result; + } + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + uint32_t insn32 = (bytes[3] << 8) | + (bytes[2] << 0) | + (bytes[1] << 24) | + (bytes[0] << 16); + MI.clear(); + result = decodeThumbInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + bool InITBlock = ITBlock.size(); + Check(result, AddThumbPredicate(MI)); + AddThumb1SBit(MI, InITBlock); + return result; + } + + MI.clear(); + result = decodeThumb2Instruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + + MI.clear(); + result = decodeVFPInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } + + MI.clear(); + result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + + if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) { + MI.clear(); + uint32_t NEONLdStInsn = insn32; + NEONLdStInsn &= 0xF0FFFFFF; + NEONLdStInsn |= 0x04000000; + result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + + if (fieldFromInstruction32(insn32, 24, 4) == 0xF) { + MI.clear(); + uint32_t NEONDataInsn = insn32; + NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 + NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 + NEONDataInsn |= 0x12000000; // Set bits 28 and 25 + result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + + Size = 0; + return MCDisassembler::Fail; +} + + +extern "C" void LLVMInitializeARMDisassembler() { + TargetRegistry::RegisterMCDisassembler(TheARMTarget, + createARMDisassembler); + TargetRegistry::RegisterMCDisassembler(TheThumbTarget, + createThumbDisassembler); +} + +static const unsigned GPRDecoderTable[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, ARM::R11, + ARM::R12, ARM::SP, ARM::LR, ARM::PC +}; + +static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + unsigned Register = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo == 15) return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = ARM::R0; + break; + case 1: + Register = ARM::R1; + break; + case 2: + Register = ARM::R2; break; case 3: - switch (slice(insn, 7, 4)) { - case 11: - return ARM::LDRHT; - case 13: - return ARM::LDRSBT; - case 15: - return ARM::LDRSHT; - default: - break; // fallthrough - } + Register = ARM::R3; + break; + case 9: + Register = ARM::R9; + break; + case 12: + Register = ARM::R12; break; default: - break; // fallthrough + return MCDisassembler::Fail; } + + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static const unsigned SPRDecoderTable[] = { + ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15, + ARM::S16, ARM::S17, ARM::S18, ARM::S19, + ARM::S20, ARM::S21, ARM::S22, ARM::S23, + ARM::S24, ARM::S25, ARM::S26, ARM::S27, + ARM::S28, ARM::S29, ARM::S30, ARM::S31 +}; + +static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Register = SPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static const unsigned DPRDecoderTable[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15, + ARM::D16, ARM::D17, ARM::D18, ARM::D19, + ARM::D20, ARM::D21, ARM::D22, ARM::D23, + ARM::D24, ARM::D25, ARM::D26, ARM::D27, + ARM::D28, ARM::D29, ARM::D30, ARM::D31 +}; + +static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Register = DPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus +DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static const unsigned QPRDecoderTable[] = { + ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, + ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, + ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15 +}; + + +static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + RegNo >>= 1; + + unsigned Register = QPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val == 0xF) return MCDisassembler::Fail; + // AL predicate is not allowed on Thumb1 branches. + if (Inst.getOpcode() == ARM::tBcc && Val == 0xE) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Val)); + if (Val == ARMCC::AL) { + Inst.addOperand(MCOperand::CreateReg(0)); + } else + Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val) + Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + else + Inst.addOperand(MCOperand::CreateReg(0)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + uint32_t imm = Val & 0xFF; + uint32_t rot = (Val & 0xF00) >> 7; + uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F)); + Inst.addOperand(MCOperand::CreateImm(rot_imm)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned imm = fieldFromInstruction32(Val, 7, 5); + + // Register-immediate + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::ShiftOpc Shift = ARM_AM::lsl; + switch (type) { + case 0: + Shift = ARM_AM::lsl; + break; + case 1: + Shift = ARM_AM::lsr; + break; + case 2: + Shift = ARM_AM::asr; + break; + case 3: + Shift = ARM_AM::ror; + break; } - // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153. - // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST - // properly. - if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) { - unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); - switch (slice(insn, 7, 4)) { - case 11: - switch (PW) { - case 2: // Offset - return ARM::STRH; - case 3: // Pre-indexed - return ARM::STRH_PRE; - case 0: // Post-indexed - return ARM::STRH_POST; - default: - break; // fallthrough - } + if (Shift == ARM_AM::ror && imm == 0) + Shift = ARM_AM::rrx; + + unsigned Op = Shift | (imm << 3); + Inst.addOperand(MCOperand::CreateImm(Op)); + + return S; +} + +static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned Rs = fieldFromInstruction32(Val, 8, 4); + + // Register-register + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::ShiftOpc Shift = ARM_AM::lsl; + switch (type) { + case 0: + Shift = ARM_AM::lsl; break; - case 13: - switch (PW) { - case 2: // Offset - return ARM::LDRD; - case 3: // Pre-indexed - return ARM::LDRD_PRE; - case 0: // Post-indexed - return ARM::LDRD_POST; - default: - break; // fallthrough - } + case 1: + Shift = ARM_AM::lsr; break; - case 15: - switch (PW) { - case 2: // Offset - return ARM::STRD; - case 3: // Pre-indexed - return ARM::STRD_PRE; - case 0: // Post-indexed - return ARM::STRD_POST; - default: - break; // fallthrough - } + case 2: + Shift = ARM_AM::asr; break; + case 3: + Shift = ARM_AM::ror; + break; + } + + Inst.addOperand(MCOperand::CreateImm(Shift)); + + return S; +} + +static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + bool writebackLoad = false; + unsigned writebackReg = 0; + switch (Inst.getOpcode()) { default: - break; // fallthrough + break; + case ARM::LDMIA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::LDMDA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + writebackLoad = true; + writebackReg = Inst.getOperand(0).getReg(); + break; + } + + // Empty register lists are not allowed. + if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; + for (unsigned i = 0; i < 16; ++i) { + if (Val & (1 << i)) { + if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) + return MCDisassembler::Fail; + // Writeback not allowed if Rn is in the target list. + if (writebackLoad && writebackReg == Inst.end()[-1].getReg()) + Check(S, MCDisassembler::SoftFail); } } - // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153. - // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST - // properly. - if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) { - unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); - switch (slice(insn, 7, 4)) { - case 11: - switch (PW) { - case 2: // Offset - return ARM::LDRH; - case 3: // Pre-indexed - return ARM::LDRH_PRE; - case 0: // Post-indexed - return ARM::LDRH_POST; - default: - break; // fallthrough - } + return S; +} + +static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Vd = fieldFromInstruction32(Val, 8, 4); + unsigned regs = Val & 0xFF; + + if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + for (unsigned i = 0; i < (regs - 1); ++i) { + if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Vd = fieldFromInstruction32(Val, 8, 4); + unsigned regs = (Val & 0xFF) / 2; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + for (unsigned i = 0; i < (regs - 1); ++i) { + if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + // This operand encodes a mask of contiguous zeros between a specified MSB + // and LSB. To decode it, we create the mask of all bits MSB-and-lower, + // the mask of all bits LSB-and-lower, and then xor them to create + // the mask of that's all ones on [msb, lsb]. Finally we not it to + // create the final mask. + unsigned msb = fieldFromInstruction32(Val, 5, 5); + unsigned lsb = fieldFromInstruction32(Val, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + if (lsb > msb) Check(S, MCDisassembler::SoftFail); + + uint32_t msb_mask = 0xFFFFFFFF; + if (msb != 31) msb_mask = (1U << (msb+1)) - 1; + uint32_t lsb_mask = (1U << lsb) - 1; + + Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask))); + return S; +} + +static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned CRd = fieldFromInstruction32(Insn, 12, 4); + unsigned coproc = fieldFromInstruction32(Insn, 8, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 8); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + + switch (Inst.getOpcode()) { + case ARM::LDC_OFFSET: + case ARM::LDC_PRE: + case ARM::LDC_POST: + case ARM::LDC_OPTION: + case ARM::LDCL_OFFSET: + case ARM::LDCL_PRE: + case ARM::LDCL_POST: + case ARM::LDCL_OPTION: + case ARM::STC_OFFSET: + case ARM::STC_PRE: + case ARM::STC_POST: + case ARM::STC_OPTION: + case ARM::STCL_OFFSET: + case ARM::STCL_PRE: + case ARM::STCL_POST: + case ARM::STCL_OPTION: + case ARM::t2LDC_OFFSET: + case ARM::t2LDC_PRE: + case ARM::t2LDC_POST: + case ARM::t2LDC_OPTION: + case ARM::t2LDCL_OFFSET: + case ARM::t2LDCL_PRE: + case ARM::t2LDCL_POST: + case ARM::t2LDCL_OPTION: + case ARM::t2STC_OFFSET: + case ARM::t2STC_PRE: + case ARM::t2STC_POST: + case ARM::t2STC_OPTION: + case ARM::t2STCL_OFFSET: + case ARM::t2STCL_PRE: + case ARM::t2STCL_POST: + case ARM::t2STCL_OPTION: + if (coproc == 0xA || coproc == 0xB) + return MCDisassembler::Fail; + break; + default: break; - case 13: - switch (PW) { - case 2: // Offset - return ARM::LDRSB; - case 3: // Pre-indexed - return ARM::LDRSB_PRE; - case 0: // Post-indexed - return ARM::LDRSB_POST; + } + + Inst.addOperand(MCOperand::CreateImm(coproc)); + Inst.addOperand(MCOperand::CreateImm(CRd)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + + bool writeback = (P == 0) || (W == 1); + unsigned idx_mode = 0; + if (P && writeback) + idx_mode = ARMII::IndexModePre; + else if (!P && writeback) + idx_mode = ARMII::IndexModePost; + + switch (Inst.getOpcode()) { + case ARM::t2LDC2_OFFSET: + case ARM::t2LDC2L_OFFSET: + case ARM::t2LDC2_PRE: + case ARM::t2LDC2L_PRE: + case ARM::t2STC2_OFFSET: + case ARM::t2STC2L_OFFSET: + case ARM::t2STC2_PRE: + case ARM::t2STC2L_PRE: + case ARM::LDC2_OFFSET: + case ARM::LDC2L_OFFSET: + case ARM::LDC2_PRE: + case ARM::LDC2L_PRE: + case ARM::STC2_OFFSET: + case ARM::STC2L_OFFSET: + case ARM::STC2_PRE: + case ARM::STC2L_PRE: + case ARM::t2LDC_OFFSET: + case ARM::t2LDCL_OFFSET: + case ARM::t2LDC_PRE: + case ARM::t2LDCL_PRE: + case ARM::t2STC_OFFSET: + case ARM::t2STCL_OFFSET: + case ARM::t2STC_PRE: + case ARM::t2STCL_PRE: + case ARM::LDC_OFFSET: + case ARM::LDCL_OFFSET: + case ARM::LDC_PRE: + case ARM::LDCL_PRE: + case ARM::STC_OFFSET: + case ARM::STCL_OFFSET: + case ARM::STC_PRE: + case ARM::STCL_PRE: + imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm); + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + case ARM::t2LDC2_POST: + case ARM::t2LDC2L_POST: + case ARM::t2STC2_POST: + case ARM::t2STC2L_POST: + case ARM::LDC2_POST: + case ARM::LDC2L_POST: + case ARM::STC2_POST: + case ARM::STC2L_POST: + case ARM::t2LDC_POST: + case ARM::t2LDCL_POST: + case ARM::t2STC_POST: + case ARM::t2STCL_POST: + case ARM::LDC_POST: + case ARM::LDCL_POST: + case ARM::STC_POST: + case ARM::STCL_POST: + imm |= U << 8; + // fall through. + default: + // The 'option' variant doesn't encode 'U' in the immediate since + // the immediate is unsigned [0,255]. + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + } + + switch (Inst.getOpcode()) { + case ARM::LDC_OFFSET: + case ARM::LDC_PRE: + case ARM::LDC_POST: + case ARM::LDC_OPTION: + case ARM::LDCL_OFFSET: + case ARM::LDCL_PRE: + case ARM::LDCL_POST: + case ARM::LDCL_OPTION: + case ARM::STC_OFFSET: + case ARM::STC_PRE: + case ARM::STC_POST: + case ARM::STC_OPTION: + case ARM::STCL_OFFSET: + case ARM::STCL_PRE: + case ARM::STCL_POST: + case ARM::STCL_OPTION: + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus +DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned reg = fieldFromInstruction32(Insn, 25, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + + // On stores, the writeback operand precedes Rt. + switch (Inst.getOpcode()) { + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRT_POST_REG: + case ARM::STRT_POST_IMM: + case ARM::STRBT_POST_REG: + case ARM::STRBT_POST_IMM: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + + // On loads, the writeback operand comes after Rt. + switch (Inst.getOpcode()) { + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRBT_POST_REG: + case ARM::LDRBT_POST_IMM: + case ARM::LDRT_POST_REG: + case ARM::LDRT_POST_IMM: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::AddrOpc Op = ARM_AM::add; + if (!fieldFromInstruction32(Insn, 23, 1)) + Op = ARM_AM::sub; + + bool writeback = (P == 0) || (W == 1); + unsigned idx_mode = 0; + if (P && writeback) + idx_mode = ARMII::IndexModePre; + else if (!P && writeback) + idx_mode = ARMII::IndexModePost; + + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; // UNPREDICTABLE + + if (reg) { + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + ARM_AM::ShiftOpc Opc = ARM_AM::lsl; + switch( fieldFromInstruction32(Insn, 5, 2)) { + case 0: + Opc = ARM_AM::lsl; + break; + case 1: + Opc = ARM_AM::lsr; + break; + case 2: + Opc = ARM_AM::asr; + break; + case 3: + Opc = ARM_AM::ror; + break; default: - break; // fallthrough - } + return MCDisassembler::Fail; + } + unsigned amt = fieldFromInstruction32(Insn, 7, 5); + unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); + + Inst.addOperand(MCOperand::CreateImm(imm)); + } else { + Inst.addOperand(MCOperand::CreateReg(0)); + unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode); + Inst.addOperand(MCOperand::CreateImm(tmp)); + } + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned imm = fieldFromInstruction32(Val, 7, 5); + unsigned U = fieldFromInstruction32(Val, 12, 1); + + ARM_AM::ShiftOpc ShOp = ARM_AM::lsl; + switch (type) { + case 0: + ShOp = ARM_AM::lsl; + break; + case 1: + ShOp = ARM_AM::lsr; + break; + case 2: + ShOp = ARM_AM::asr; + break; + case 3: + ShOp = ARM_AM::ror; + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + unsigned shift; + if (U) + shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp); + else + shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp); + Inst.addOperand(MCOperand::CreateImm(shift)); + + return S; +} + +static DecodeStatus +DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned type = fieldFromInstruction32(Insn, 22, 1); + unsigned imm = fieldFromInstruction32(Insn, 8, 4); + unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + + bool writeback = (W == 1) | (P == 0); + + // For {LD,ST}RD, Rt must be even, else undefined. + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (Rt & 0x1) return MCDisassembler::Fail; + break; + default: + break; + } + + if (writeback) { // Writeback + if (P) + U |= ARMII::IndexModePre << 9; + else + U |= ARMII::IndexModePost << 9; + + // On stores, the writeback operand precedes Rt. + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; break; - case 15: - switch (PW) { - case 2: // Offset - return ARM::LDRSH; - case 3: // Pre-indexed - return ARM::LDRSH_PRE; - case 0: // Post-indexed - return ARM::LDRSH_POST; + default: + break; + } + + if (writeback) { + // On loads, the writeback operand comes after Rt. + switch (Inst.getOpcode()) { + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + case ARM::LDRHTr: + case ARM::LDRSBTr: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (type) { + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm)); + } else { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(U)); + } + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned mode = fieldFromInstruction32(Insn, 23, 2); + + switch (mode) { + case 0: + mode = ARM_AM::da; + break; + case 1: + mode = ARM_AM::ia; + break; + case 2: + mode = ARM_AM::db; + break; + case 3: + mode = ARM_AM::ib; + break; + } + + Inst.addOperand(MCOperand::CreateImm(mode)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned reglist = fieldFromInstruction32(Insn, 0, 16); + + if (pred == 0xF) { + switch (Inst.getOpcode()) { + case ARM::LDMDA: + Inst.setOpcode(ARM::RFEDA); + break; + case ARM::LDMDA_UPD: + Inst.setOpcode(ARM::RFEDA_UPD); + break; + case ARM::LDMDB: + Inst.setOpcode(ARM::RFEDB); + break; + case ARM::LDMDB_UPD: + Inst.setOpcode(ARM::RFEDB_UPD); + break; + case ARM::LDMIA: + Inst.setOpcode(ARM::RFEIA); + break; + case ARM::LDMIA_UPD: + Inst.setOpcode(ARM::RFEIA_UPD); + break; + case ARM::LDMIB: + Inst.setOpcode(ARM::RFEIB); + break; + case ARM::LDMIB_UPD: + Inst.setOpcode(ARM::RFEIB_UPD); + break; + case ARM::STMDA: + Inst.setOpcode(ARM::SRSDA); + break; + case ARM::STMDA_UPD: + Inst.setOpcode(ARM::SRSDA_UPD); + break; + case ARM::STMDB: + Inst.setOpcode(ARM::SRSDB); + break; + case ARM::STMDB_UPD: + Inst.setOpcode(ARM::SRSDB_UPD); + break; + case ARM::STMIA: + Inst.setOpcode(ARM::SRSIA); + break; + case ARM::STMIA_UPD: + Inst.setOpcode(ARM::SRSIA_UPD); + break; + case ARM::STMIB: + Inst.setOpcode(ARM::SRSIB); + break; + case ARM::STMIB_UPD: + Inst.setOpcode(ARM::SRSIB_UPD); + break; default: - break; // fallthrough - } + if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail; + } + + // For stores (which become SRS's, the only operand is the mode. + if (fieldFromInstruction32(Insn, 20, 1) == 0) { + Inst.addOperand( + MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4))); + return S; + } + + return DecodeRFEInstruction(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; // Tied + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction32(Insn, 18, 2); + unsigned M = fieldFromInstruction32(Insn, 17, 1); + unsigned iflags = fieldFromInstruction32(Insn, 6, 3); + unsigned mode = fieldFromInstruction32(Insn, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + + // imod == '01' --> UNPREDICTABLE + // NOTE: Even though this is technically UNPREDICTABLE, we choose to + // return failure here. The '01' imod value is unprintable, so there's + // nothing useful we could do even if we returned UNPREDICTABLE. + + if (imod == 1) return MCDisassembler::Fail; + + if (imod && M) { + Inst.setOpcode(ARM::CPS3p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + Inst.addOperand(MCOperand::CreateImm(mode)); + } else if (imod && !M) { + Inst.setOpcode(ARM::CPS2p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + if (mode) S = MCDisassembler::SoftFail; + } else if (!imod && M) { + Inst.setOpcode(ARM::CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + if (iflags) S = MCDisassembler::SoftFail; + } else { + // imod == '00' && M == '0' --> UNPREDICTABLE + Inst.setOpcode(ARM::CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + S = MCDisassembler::SoftFail; + } + + return S; +} + +static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction32(Insn, 9, 2); + unsigned M = fieldFromInstruction32(Insn, 8, 1); + unsigned iflags = fieldFromInstruction32(Insn, 5, 3); + unsigned mode = fieldFromInstruction32(Insn, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + + // imod == '01' --> UNPREDICTABLE + // NOTE: Even though this is technically UNPREDICTABLE, we choose to + // return failure here. The '01' imod value is unprintable, so there's + // nothing useful we could do even if we returned UNPREDICTABLE. + + if (imod == 1) return MCDisassembler::Fail; + + if (imod && M) { + Inst.setOpcode(ARM::t2CPS3p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + Inst.addOperand(MCOperand::CreateImm(mode)); + } else if (imod && !M) { + Inst.setOpcode(ARM::t2CPS2p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + if (mode) S = MCDisassembler::SoftFail; + } else if (!imod && M) { + Inst.setOpcode(ARM::t2CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + if (iflags) S = MCDisassembler::SoftFail; + } else { + // imod == '00' && M == '0' --> UNPREDICTABLE + Inst.setOpcode(ARM::t2CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + S = MCDisassembler::SoftFail; + } + + return S; +} + +static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 8, 4); + unsigned imm = 0; + + imm |= (fieldFromInstruction32(Insn, 0, 8) << 0); + imm |= (fieldFromInstruction32(Insn, 12, 3) << 8); + imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction32(Insn, 26, 1) << 11); + + if (Inst.getOpcode() == ARM::t2MOVTi16) + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned imm = 0; + + imm |= (fieldFromInstruction32(Insn, 0, 12) << 0); + imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + + if (Inst.getOpcode() == ARM::MOVTi16) + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 8, 4); + unsigned Ra = fieldFromInstruction32(Insn, 12, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder))) + return MCDisassembler::Fail; + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned add = fieldFromInstruction32(Val, 12, 1); + unsigned imm = fieldFromInstruction32(Val, 0, 12); + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (!add) imm *= -1; + if (imm == 0 && !add) imm = INT32_MIN; + Inst.addOperand(MCOperand::CreateImm(imm)); + if (Rn == 15) + tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder); + + return S; +} + +static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned U = fieldFromInstruction32(Val, 8, 1); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (U) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm))); + + return S; +} + +static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); +} + +static DecodeStatus +DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2; + + if (pred == 0xF) { + Inst.setOpcode(ARM::BLXi); + imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); + return S; + } + + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, + 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + + +static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned align = fieldFromInstruction32(Val, 4, 2); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!align) + Inst.addOperand(MCOperand::CreateImm(0)); + else + Inst.addOperand(MCOperand::CreateImm(4 << align)); + + return S; +} + +static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + // First output register + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + // Second output register + switch (Inst.getOpcode()) { + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD1d8T: + case ARM::VLD1d16T: + case ARM::VLD1d32T: + case ARM::VLD1d64T: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; break; + case ARM::VLD2b8: + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2b8_UPD: + case ARM::VLD2b16_UPD: + case ARM::VLD2b32_UPD: + case ARM::VLD3q8: + case ARM::VLD3q16: + case ARM::VLD3q32: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; default: - break; // fallthrough + break; + } + + // Third output register + switch(Inst.getOpcode()) { + case ARM::VLD1d8T: + case ARM::VLD1d16T: + case ARM::VLD1d32T: + case ARM::VLD1d64T: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD3q8: + case ARM::VLD3q16: + case ARM::VLD3q32: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Fourth output register + switch (Inst.getOpcode()) { + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Writeback operand + switch (Inst.getOpcode()) { + case ARM::VLD1d8_UPD: + case ARM::VLD1d16_UPD: + case ARM::VLD1d32_UPD: + case ARM::VLD1d64_UPD: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD2b8_UPD: + case ARM::VLD2b16_UPD: + case ARM::VLD2b32_UPD: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // AddrMode6 Base (register+alignment) + if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + // AddrMode6 Offset (register) + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + // Writeback Operand + switch (Inst.getOpcode()) { + case ARM::VST1d8_UPD: + case ARM::VST1d16_UPD: + case ARM::VST1d32_UPD: + case ARM::VST1d64_UPD: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2d8_UPD: + case ARM::VST2d16_UPD: + case ARM::VST2d32_UPD: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST2b8_UPD: + case ARM::VST2b16_UPD: + case ARM::VST2b32_UPD: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // AddrMode6 Base (register+alignment) + if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + // AddrMode6 Offset (register) + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + // First input register + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + // Second input register + switch (Inst.getOpcode()) { + case ARM::VST1q8: + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + case ARM::VST1d8T: + case ARM::VST1d16T: + case ARM::VST1d32T: + case ARM::VST1d64T: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2d8: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8_UPD: + case ARM::VST2d16_UPD: + case ARM::VST2d32_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST2b8: + case ARM::VST2b16: + case ARM::VST2b32: + case ARM::VST2b8_UPD: + case ARM::VST2b16_UPD: + case ARM::VST2b32_UPD: + case ARM::VST3q8: + case ARM::VST3q16: + case ARM::VST3q32: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Third input register + switch (Inst.getOpcode()) { + case ARM::VST1d8T: + case ARM::VST1d16T: + case ARM::VST1d32T: + case ARM::VST1d64T: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST3q8: + case ARM::VST3q16: + case ARM::VST3q32: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Fourth input register + switch (Inst.getOpcode()) { + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; + + align *= (1 << size); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (regs == 2) { + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; + } + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + align *= 2*size; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(0)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned align = fieldFromInstruction32(Insn, 4, 1); + + if (size == 0x3) { + size = 4; + align = 16; + } else { + if (size == 2) { + size = 1 << size; + align *= 8; + } else { + size = 1 << size; + align *= 4*size; } } -AutoGenedDecoder: - // Calling the auto-generated decoder function. - return decodeInstruction(insn); + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; } -// Helper function for special case handling of LDR (literal) and friends. -// See, for example, A6.3.7 Load word: Table A6-18 Load word. -// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode -// before returning it. -static unsigned T2Morph2LoadLiteral(unsigned Opcode) { - switch (Opcode) { - default: - return Opcode; // Return unmorphed opcode. +static DecodeStatus +DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned imm = fieldFromInstruction32(Insn, 0, 4); + imm |= fieldFromInstruction32(Insn, 16, 3) << 4; + imm |= fieldFromInstruction32(Insn, 24, 1) << 7; + imm |= fieldFromInstruction32(Insn, 8, 4) << 8; + imm |= fieldFromInstruction32(Insn, 5, 1) << 12; + unsigned Q = fieldFromInstruction32(Insn, 6, 1); + + if (Q) { + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } else { + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } + + Inst.addOperand(MCOperand::CreateImm(imm)); + + switch (Inst.getOpcode()) { + case ARM::VORRiv4i16: + case ARM::VORRiv2i32: + case ARM::VBICiv4i16: + case ARM::VBICiv2i32: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VORRiv8i16: + case ARM::VORRiv4i32: + case ARM::VBICiv8i16: + case ARM::VBICiv4i32: + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 18, 2); + + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(8 << size)); + + return S; +} + +static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(8 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDR_POST: case ARM::t2LDR_PRE: - case ARM::t2LDRi12: case ARM::t2LDRi8: - case ARM::t2LDRs: case ARM::t2LDRT: - return ARM::t2LDRpci; +static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(16 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: - case ARM::t2LDRBi12: case ARM::t2LDRBi8: - case ARM::t2LDRBs: case ARM::t2LDRBT: - return ARM::t2LDRBpci; +static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(32 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: - case ARM::t2LDRHi12: case ARM::t2LDRHi8: - case ARM::t2LDRHs: case ARM::t2LDRHT: - return ARM::t2LDRHpci; +static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: - case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: - case ARM::t2LDRSBs: case ARM::t2LDRSBT: - return ARM::t2LDRSBpci; +static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 7, 1) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + unsigned op = fieldFromInstruction32(Insn, 6, 1); + unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (op) { + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; // Writeback + } - case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: - case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: - case ARM::t2LDRSHs: case ARM::t2LDRSHT: - return ARM::t2LDRSHpci; + for (unsigned i = 0; i < length; ++i) { + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) + return MCDisassembler::Fail; } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -// Helper function for special case handling of PLD (literal) and friends. -// See A8.6.117 T1 & T2 and friends for why we morphed the opcode -// before returning it. -static unsigned T2Morph2PLDLiteral(unsigned Opcode) { - switch (Opcode) { - default: - return Opcode; // Return unmorphed opcode. - - case ARM::t2PLDi8: case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWs: - return ARM::t2PLDi12; - - case ARM::t2PLIi8: case ARM::t2PLIs: - return ARM::t2PLIi12; - } -} - -/// decodeThumbSideEffect is a decorator function which can potentially twiddle -/// the instruction or morph the returned opcode under Thumb2. -/// -/// First it checks whether the insn is a NEON or VFP instr; if true, bit -/// twiddling could be performed on insn to turn it into an ARM NEON/VFP -/// equivalent instruction and decodeInstruction is called with the transformed -/// insn. -/// -/// Next, there is special handling for Load byte/halfword/word instruction by -/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded -/// Thumb2 instruction. See comments below for further details. -/// -/// Finally, one last check is made to see whether the insn is a NEON/VFP and -/// decodeInstruction(insn) is invoked on the original insn. -/// -/// Otherwise, decodeThumbInstruction is called with the original insn. -static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { - if (IsThumb2) { - uint16_t op1 = slice(insn, 28, 27); - uint16_t op2 = slice(insn, 26, 20); - - // A6.3 32-bit Thumb instruction encoding - // Table A6-9 32-bit Thumb instruction encoding - - // The coprocessor instructions of interest are transformed to their ARM - // equivalents. - - // --------- Transform Begin Marker --------- - if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) { - // A7.4 Advanced SIMD data-processing instructions - // U bit of Thumb corresponds to Inst{24} of ARM. - uint16_t U = slice(op1, 1, 1); - - // Inst{28-24} of ARM = {1,0,0,1,U}; - uint16_t bits28_24 = 9 << 1 | U; - DEBUG(showBitVector(errs(), insn)); - setSlice(insn, 28, 24, bits28_24); - return decodeInstruction(insn); +static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned dst = fieldFromInstruction16(Insn, 8, 3); + unsigned imm = fieldFromInstruction16(Insn, 0, 8); + + if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder))) + return MCDisassembler::Fail; + + switch(Inst.getOpcode()) { + default: + return MCDisassembler::Fail; + case ARM::tADR: + break; // tADR does not explicitly represent the PC as an operand. + case ARM::tADDrSPi: + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + break; + } + + Inst.addOperand(MCOperand::CreateImm(imm)); + return S; +} + +static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 0, 3); + unsigned Rm = fieldFromInstruction32(Val, 3, 3); + + if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 0, 3); + unsigned imm = fieldFromInstruction32(Val, 3, 5); + + if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + unsigned imm = Val << 2; + + Inst.addOperand(MCOperand::CreateImm(imm)); + tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateImm(Val)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 6, 4); + unsigned Rm = fieldFromInstruction32(Val, 2, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 2); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + switch (Inst.getOpcode()) { + case ARM::t2PLDs: + case ARM::t2PLDWs: + case ARM::t2PLIs: + break; + default: { + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; } + } - if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) { - // A7.7 Advanced SIMD element or structure load/store instructions - // Inst{27-24} of Thumb = 0b1001 - // Inst{27-24} of ARM = 0b0100 - DEBUG(showBitVector(errs(), insn)); - setSlice(insn, 27, 24, 4); - return decodeInstruction(insn); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + if (Rn == 0xF) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBs: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHs: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHs: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2PLDs: + Inst.setOpcode(ARM::t2PLDi12); + Inst.addOperand(MCOperand::CreateReg(ARM::PC)); + break; + default: + return MCDisassembler::Fail; } - // --------- Transform End Marker --------- - - unsigned unmorphed = decodeThumbInstruction(insn); - - // See, for example, A6.3.7 Load word: Table A6-18 Load word. - // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode - // before returning it to our caller. - if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 - && slice(insn, 19, 16) == 15) { - unsigned morphed = T2Morph2LoadLiteral(unmorphed); - if (morphed != unmorphed) - return morphed; + + int imm = fieldFromInstruction32(Insn, 0, 12); + if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; + } + + unsigned addrmode = fieldFromInstruction32(Insn, 4, 2); + addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6; + if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + int imm = Val & 0xFF; + if (!(Val & 0x100)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm << 2)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 9); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 8, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + int imm = Val & 0xFF; + if (Val == 0) + imm = INT32_MIN; + else if (!(Val & 0x100)) + imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 9); + + // Some instructions always use an additive offset. + switch (Inst.getOpcode()) { + case ARM::t2LDRT: + case ARM::t2LDRBT: + case ARM::t2LDRHT: + case ARM::t2LDRSBT: + case ARM::t2LDRSHT: + case ARM::t2STRT: + case ARM::t2STRBT: + case ARM::t2STRHT: + imm |= 0x100; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + addr |= fieldFromInstruction32(Insn, 9, 1) << 8; + addr |= Rn << 9; + unsigned load = fieldFromInstruction32(Insn, 20, 1); + + if (!load) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + + if (load) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 12); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + + +static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + unsigned imm = fieldFromInstruction16(Insn, 0, 7); + + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateImm(imm)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (Inst.getOpcode() == ARM::tADDrSP) { + unsigned Rdm = fieldFromInstruction16(Insn, 0, 3); + Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + } else if (Inst.getOpcode() == ARM::tADDspr) { + unsigned Rm = fieldFromInstruction16(Insn, 3, 4); + + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; + unsigned flags = fieldFromInstruction16(Insn, 0, 3); + + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(flags)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned add = fieldFromInstruction32(Insn, 4, 1); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(add)); + + return S; +} + +static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (!tryAddingSymbolicOperand(Address, + (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val == 0xA || Val == 0xB) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + if (Rn == ARM::SP) S = MCDisassembler::SoftFail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus +DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 22, 4); + if (pred == 0xE || pred == 0xF) { + unsigned opc = fieldFromInstruction32(Insn, 4, 28); + switch (opc) { + default: + return MCDisassembler::Fail; + case 0xf3bf8f4: + Inst.setOpcode(ARM::t2DSB); + break; + case 0xf3bf8f5: + Inst.setOpcode(ARM::t2DMB); + break; + case 0xf3bf8f6: + Inst.setOpcode(ARM::t2ISB); + break; } - // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for - // why we morphed the opcode before returning it to our caller. - if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF - && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1 - && slice(insn, 19, 16) == 15) { - unsigned morphed = T2Morph2PLDLiteral(unmorphed); - if (morphed != unmorphed) - return morphed; + unsigned imm = fieldFromInstruction32(Insn, 0, 4); + return DecodeMemBarrierOption(Inst, imm, Address, Decoder); + } + + unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1; + brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19; + brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18; + brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12; + brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20; + + if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +// Decode a shifted immediate operand. These basically consist +// of an 8-bit value, and a 4-bit directive that specifies either +// a splat operation or a rotation. +static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + unsigned ctrl = fieldFromInstruction32(Val, 10, 2); + if (ctrl == 0) { + unsigned byte = fieldFromInstruction32(Val, 8, 2); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + switch (byte) { + case 0: + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + case 1: + Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm)); + break; + case 2: + Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8))); + break; + case 3: + Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) | + (imm << 8) | imm)); + break; } + } else { + unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80; + unsigned rot = fieldFromInstruction32(Val, 7, 5); + unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31)); + Inst.addOperand(MCOperand::CreateImm(imm)); + } - // One last check for NEON/VFP instructions. - if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) - return decodeInstruction(insn); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder){ + Inst.addOperand(MCOperand::CreateImm(Val << 1)); + return MCDisassembler::Success; +} - // Fall through. +static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder){ + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + switch (Val) { + default: + return MCDisassembler::Fail; + case 0xF: // SY + case 0xE: // ST + case 0xB: // ISH + case 0xA: // ISHST + case 0x7: // NSH + case 0x6: // NSHST + case 0x3: // OSH + case 0x2: // OSHST + break; } - return decodeThumbInstruction(insn); + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; } -// -// Public interface for the disassembler -// +static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (!Val) return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} -bool ARMDisassembler::getInstruction(MCInst &MI, - uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os) const { - // The machine instruction. - uint32_t insn; - uint8_t bytes[4]; +static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) - return false; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); - // Encoded as a small-endian 32-bit word in the stream. - insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); - - unsigned Opcode = decodeARMInstruction(insn); - ARMFormat Format = ARMFormats[Opcode]; - Size = 4; - - DEBUG({ - errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode) - << " Format=" << stringForARMFormat(Format) << '(' << (int)Format - << ")\n"; - showBitVector(errs(), insn); - }); - - OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); - if (!Builder) - return false; + if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; - Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), - getDisInfoBlock(), getMCContext(), - Address); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; - if (!Builder->Build(MI, insn)) - return false; + return S; +} - return true; + +static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder){ + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; + if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -bool ThumbDisassembler::getInstruction(MCInst &MI, - uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os) const { - // The Thumb instruction stream is a sequence of halfwords. - - // This represents the first halfword as well as the machine instruction - // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top - // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to - // the top half followed by the second halfword. - unsigned insn = 0; - // Possible second halfword. - uint16_t insn1 = 0; - - // A6.1 Thumb instruction set encoding - // - // If bits [15:11] of the halfword being decoded take any of the following - // values, the halfword is the first halfword of a 32-bit instruction: - // o 0b11101 - // o 0b11110 - // o 0b11111. - // - // Otherwise, the halfword is a 16-bit instruction. - - // Read 2 bytes of data first. - uint8_t bytes[2]; - if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) - return false; +static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} - // Encoded as a small-endian 16-bit halfword in the stream. - insn = (bytes[1] << 8) | bytes[0]; - unsigned bits15_11 = slice(insn, 15, 11); - bool IsThumb2 = false; +static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + if (Rm == 0xF) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} - // 32-bit instructions if the bits [15:11] of the halfword matches - // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }. - if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) { - IsThumb2 = true; - if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1) - return false; - // Encoded as a small-endian 16-bit halfword in the stream. - insn1 = (bytes[1] << 8) | bytes[0]; - insn = (insn << 16 | insn1); - } - - // The insn could potentially be bit-twiddled in order to be decoded as an ARM - // NEON/VFP opcode. In such case, the modified insn is later disassembled as - // an ARM NEON/VFP instruction. - // - // This is a short term solution for lack of encoding bits specified for the - // Thumb2 NEON/VFP instructions. The long term solution could be adding some - // infrastructure to have each instruction support more than one encodings. - // Which encoding is used would be based on which subtarget the compiler/ - // disassembler is working with at the time. This would allow the sharing of - // the NEON patterns between ARM and Thumb2, as well as potential greater - // sharing between the regular ARM instructions and the 32-bit wide Thumb2 - // instructions as well. - unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn); - - ARMFormat Format = ARMFormats[Opcode]; - Size = IsThumb2 ? 4 : 2; - - DEBUG({ - errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) - << " Format=" << stringForARMFormat(Format) << '(' << (int)Format - << ")\n"; - showBitVector(errs(), insn); - }); - - OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); - if (!Builder) - return false; - Builder->SetSession(const_cast<Session *>(&SO)); +static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), - getDisInfoBlock(), getMCContext(), - Address); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); - if (!Builder->Build(MI, insn)) - return false; + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; - return true; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -// A8.6.50 -// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. -static unsigned short CountITSize(unsigned ITMask) { - // First count the trailing zeros of the IT mask. - unsigned TZ = CountTrailingZeros_32(ITMask); - if (TZ > 3) { - DEBUG(errs() << "Encoding error: IT Mask '0000'"); - return 0; +static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 6, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 2) != 0) + align = 4; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } - return (4 - TZ); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -/// Init ITState. Note that at least one bit is always 1 in mask. -bool Session::InitIT(unsigned short bits7_0) { - ITCounter = CountITSize(slice(bits7_0, 3, 0)); - if (ITCounter == 0) - return false; +static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - // A8.6.50 IT - unsigned short FirstCond = slice(bits7_0, 7, 4); - if (FirstCond == 0xF) { - DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); - return false; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 6, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 2) != 0) + align = 4; } - if (FirstCond == 0xE && ITCounter != 1) { - DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); - return false; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } - ITState = bits7_0; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); - return true; + return S; } -/// Update ITState if necessary. -void Session::UpdateIT() { - assert(ITCounter); - --ITCounter; - if (ITCounter == 0) - ITState = 0; - else { - unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1; - setSlice(ITState, 4, 0, NewITState4_0); + +static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + index = fieldFromInstruction32(Insn, 5, 3); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 1: + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 1) != 0) + align = 8; + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -static MCDisassembler *createARMDisassembler(const Target &T) { - return new ARMDisassembler; +static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + index = fieldFromInstruction32(Insn, 5, 3); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 1: + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 1) != 0) + align = 8; + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -static MCDisassembler *createThumbDisassembler(const Target &T) { - return new ThumbDisassembler; + +static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -extern "C" void LLVMInitializeARMDisassembler() { - // Register the disassembler. - TargetRegistry::RegisterMCDisassembler(TheARMTarget, - createARMDisassembler); - TargetRegistry::RegisterMCDisassembler(TheThumbTarget, - createThumbDisassembler); +static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -EDInstInfo *ARMDisassembler::getEDInfo() const { - return instInfoARM; + +static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 8; + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + align = 4 << fieldFromInstruction32(Insn, 4, 2); + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -EDInstInfo *ThumbDisassembler::getEDInfo() const { - return instInfoARM; +static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 8; + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + align = 4 << fieldFromInstruction32(Insn, 4, 2); + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; +} + +static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + + if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + + if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned pred = fieldFromInstruction16(Insn, 4, 4); + // The InstPrinter needs to have the low bit of the predicate in + // the mask operand to be able to print it properly. + unsigned mask = fieldFromInstruction16(Insn, 0, 5); + + if (pred == 0xF) { + pred = 0xE; + S = MCDisassembler::SoftFail; + } + + if ((mask & 0xF) == 0) { + // Preserve the high bit of the mask, which is the low bit of + // the predicate. + mask &= 0x10; + mask |= 0x8; + S = MCDisassembler::SoftFail; + } + + Inst.addOperand(MCOperand::CreateImm(pred)); + Inst.addOperand(MCOperand::CreateImm(mask)); + return S; } + +static DecodeStatus +DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + bool writeback = (W == 1) | (P == 0); + + addr |= (U << 8) | (Rn << 9); + + if (writeback && (Rn == Rt || Rn == Rt2)) + Check(S, MCDisassembler::SoftFail); + if (Rt == Rt2) + Check(S, MCDisassembler::SoftFail); + + // Rt + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + // Rt2 + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + // Writeback operand + if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + // addr + if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus +DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + bool writeback = (W == 1) | (P == 0); + + addr |= (U << 8) | (Rn << 9); + + if (writeback && (Rn == Rt || Rn == Rt2)) + Check(S, MCDisassembler::SoftFail); + + // Writeback operand + if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + // Rt + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + // Rt2 + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + // addr + if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, + uint64_t Address, const void *Decoder) { + unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); + if (sign1 != sign2) return MCDisassembler::Fail; + + unsigned Val = fieldFromInstruction32(Insn, 0, 8); + Val |= fieldFromInstruction32(Insn, 12, 3) << 8; + Val |= fieldFromInstruction32(Insn, 26, 1) << 11; + Val |= sign1 << 12; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val))); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, + uint64_t Address, + const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + // Shift of "asr #32" is not allowed in Thumb2 mode. + if (Val == 0x20) S = MCDisassembler::SoftFail; + Inst.addOperand(MCOperand::CreateImm(Val)); + return S; +} + diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h deleted file mode 100644 index 0a74a38..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains the header for ARMDisassembler and ThumbDisassembler, both are -// subclasses of MCDisassembler. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMDISASSEMBLER_H -#define ARMDISASSEMBLER_H - -#include "llvm/MC/MCDisassembler.h" - -namespace llvm { - -class MCInst; -class MemoryObject; -class raw_ostream; - -struct EDInstInfo; - -/// ARMDisassembler - ARM disassembler for all ARM platforms. -class ARMDisassembler : public MCDisassembler { -public: - /// Constructor - Initializes the disassembler. - /// - ARMDisassembler() : - MCDisassembler() { - } - - ~ARMDisassembler() { - } - - /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; - - /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; -private: -}; - -// Forward declaration. -class ARMBasicMCBuilder; - -/// Session - Keep track of the IT Block progression. -class Session { - friend class ARMBasicMCBuilder; -public: - Session() : ITCounter(0), ITState(0) {} - ~Session() {} - /// InitIT - Initializes ITCounter/ITState. - bool InitIT(unsigned short bits7_0); - /// UpdateIT - Updates ITCounter/ITState as IT Block progresses. - void UpdateIT(); - -private: - unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4. - unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially. -}; - -/// ThumbDisassembler - Thumb disassembler for all ARM platforms. -class ThumbDisassembler : public MCDisassembler { -public: - /// Constructor - Initializes the disassembler. - /// - ThumbDisassembler() : - MCDisassembler(), SO() { - } - - ~ThumbDisassembler() { - } - - /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; - - /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; -private: - Session SO; -}; - -} // namespace llvm - -#endif diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp deleted file mode 100644 index d89c80a..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ /dev/null @@ -1,3818 +0,0 @@ -//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code to represent the core concepts of Builder and DisassembleFP -// to solve the problem of disassembling an ARM instr. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-disassembler" - -#include "ARMDisassemblerCore.h" -#include "ARMAddressingModes.h" -#include "ARMMCExpr.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -//#define DEBUG(X) do { X; } while (0) - -/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const -/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the -/// operand info for each ARMInsts[i]. -/// -/// Together with an instruction's encoding format, we can take advantage of the -/// NumOperands and the OpInfo fields of the target instruction description in -/// the quest to build out the MCOperand list for an MCInst. -/// -/// The general guideline is that with a known format, the number of dst and src -/// operands are well-known. The dst is built first, followed by the src -/// operand(s). The operands not yet used at this point are for the Implicit -/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is -/// defined with two components: -/// -/// def pred { // Operand PredicateOperand -/// ValueType Type = OtherVT; -/// string PrintMethod = "printPredicateOperand"; -/// string AsmOperandLowerMethod = ?; -/// dag MIOperandInfo = (ops i32imm, CCR); -/// AsmOperandClass ParserMatchClass = ImmAsmOperand; -/// dag DefaultOps = (ops (i32 14), (i32 zero_reg)); -/// } -/// -/// which is manifested by the MCOperandInfo[] of: -/// -/// { 0, 0|(1<<MCOI::Predicate), 0 }, -/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 } -/// -/// So the first predicate MCOperand corresponds to the immediate part of the -/// ARM condition field (Inst{31-28}), and the second predicate MCOperand -/// corresponds to a register kind of ARM::CPSR. -/// -/// For the Defs part, in the simple case of only cc_out:$s, we have: -/// -/// def cc_out { // Operand OptionalDefOperand -/// ValueType Type = OtherVT; -/// string PrintMethod = "printSBitModifierOperand"; -/// string AsmOperandLowerMethod = ?; -/// dag MIOperandInfo = (ops CCR); -/// AsmOperandClass ParserMatchClass = ImmAsmOperand; -/// dag DefaultOps = (ops (i32 zero_reg)); -/// } -/// -/// which is manifested by the one MCOperandInfo of: -/// -/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 } -/// - -namespace llvm { -extern MCInstrDesc ARMInsts[]; -} - -using namespace llvm; - -const char *ARMUtils::OpcodeName(unsigned Opcode) { - return ARMInsts[Opcode].Name; -} - -// Return the register enum Based on RegClass and the raw register number. -// FIXME: Auto-gened? -static unsigned -getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { - if (RegClassID == ARM::rGPRRegClassID) { - // Check for The register numbers 13 and 15 that are not permitted for many - // Thumb register specifiers. - if (RawRegister == 13 || RawRegister == 15) { - B->SetErr(-1); - return 0; - } - // For this purpose, we can treat rGPR as if it were GPR. - RegClassID = ARM::GPRRegClassID; - } - - // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). - // A7.3 register encoding - // Qd -> bit[12] == 0 - // Qn -> bit[16] == 0 - // Qm -> bit[0] == 0 - // - // If one of these bits is 1, the instruction is UNDEFINED. - if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) { - B->SetErr(-1); - return 0; - } - unsigned RegNum = - RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; - - switch (RegNum) { - default: - break; - case 0: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D0; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q0; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0; - } - break; - case 1: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D1; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q1; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1; - } - break; - case 2: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D2; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q2; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2; - } - break; - case 3: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D3; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q3; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3; - } - break; - case 4: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D4; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4; - } - break; - case 5: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D5; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5; - } - break; - case 6: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D6; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6; - } - break; - case 7: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D7; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7; - } - break; - case 8: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R8; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8; - case ARM::QPRRegClassID: return ARM::Q8; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8; - } - break; - case 9: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R9; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9; - case ARM::QPRRegClassID: return ARM::Q9; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9; - } - break; - case 10: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R10; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10; - case ARM::QPRRegClassID: return ARM::Q10; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10; - } - break; - case 11: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R11; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11; - case ARM::QPRRegClassID: return ARM::Q11; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11; - } - break; - case 12: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R12; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12; - case ARM::QPRRegClassID: return ARM::Q12; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12; - } - break; - case 13: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::SP; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13; - case ARM::QPRRegClassID: return ARM::Q13; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13; - } - break; - case 14: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::LR; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14; - case ARM::QPRRegClassID: return ARM::Q14; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14; - } - break; - case 15: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::PC; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15; - case ARM::QPRRegClassID: return ARM::Q15; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15; - } - break; - case 16: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D16; - case ARM::SPRRegClassID: return ARM::S16; - } - break; - case 17: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D17; - case ARM::SPRRegClassID: return ARM::S17; - } - break; - case 18: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D18; - case ARM::SPRRegClassID: return ARM::S18; - } - break; - case 19: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D19; - case ARM::SPRRegClassID: return ARM::S19; - } - break; - case 20: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D20; - case ARM::SPRRegClassID: return ARM::S20; - } - break; - case 21: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D21; - case ARM::SPRRegClassID: return ARM::S21; - } - break; - case 22: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D22; - case ARM::SPRRegClassID: return ARM::S22; - } - break; - case 23: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D23; - case ARM::SPRRegClassID: return ARM::S23; - } - break; - case 24: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D24; - case ARM::SPRRegClassID: return ARM::S24; - } - break; - case 25: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D25; - case ARM::SPRRegClassID: return ARM::S25; - } - break; - case 26: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D26; - case ARM::SPRRegClassID: return ARM::S26; - } - break; - case 27: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D27; - case ARM::SPRRegClassID: return ARM::S27; - } - break; - case 28: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D28; - case ARM::SPRRegClassID: return ARM::S28; - } - break; - case 29: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D29; - case ARM::SPRRegClassID: return ARM::S29; - } - break; - case 30: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D30; - case ARM::SPRRegClassID: return ARM::S30; - } - break; - case 31: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D31; - case ARM::SPRRegClassID: return ARM::S31; - } - break; - } - DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n"); - // Encoding error. Mark the builder with error code != 0. - B->SetErr(-1); - return 0; -} - -/////////////////////////////// -// // -// Utility Functions // -// // -/////////////////////////////// - -// Extract/Decode Rd: Inst{15-12}. -static inline unsigned decodeRd(uint32_t insn) { - return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask; -} - -// Extract/Decode Rn: Inst{19-16}. -static inline unsigned decodeRn(uint32_t insn) { - return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask; -} - -// Extract/Decode Rm: Inst{3-0}. -static inline unsigned decodeRm(uint32_t insn) { - return (insn & ARMII::GPRRegMask); -} - -// Extract/Decode Rs: Inst{11-8}. -static inline unsigned decodeRs(uint32_t insn) { - return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask; -} - -static inline unsigned getCondField(uint32_t insn) { - return (insn >> ARMII::CondShift); -} - -static inline unsigned getIBit(uint32_t insn) { - return (insn >> ARMII::I_BitShift) & 1; -} - -static inline unsigned getAM3IBit(uint32_t insn) { - return (insn >> ARMII::AM3_I_BitShift) & 1; -} - -static inline unsigned getPBit(uint32_t insn) { - return (insn >> ARMII::P_BitShift) & 1; -} - -static inline unsigned getUBit(uint32_t insn) { - return (insn >> ARMII::U_BitShift) & 1; -} - -static inline unsigned getPUBits(uint32_t insn) { - return (insn >> ARMII::U_BitShift) & 3; -} - -static inline unsigned getSBit(uint32_t insn) { - return (insn >> ARMII::S_BitShift) & 1; -} - -static inline unsigned getWBit(uint32_t insn) { - return (insn >> ARMII::W_BitShift) & 1; -} - -static inline unsigned getDBit(uint32_t insn) { - return (insn >> ARMII::D_BitShift) & 1; -} - -static inline unsigned getNBit(uint32_t insn) { - return (insn >> ARMII::N_BitShift) & 1; -} - -static inline unsigned getMBit(uint32_t insn) { - return (insn >> ARMII::M_BitShift) & 1; -} - -// See A8.4 Shifts applied to a register. -// A8.4.2 Register controlled shifts. -// -// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits -// into llvm enums for shift opcode. The API clients should pass in the value -// encoded with two bits, so the assert stays to signal a wrong API usage. -// -// A8-12: DecodeRegShift() -static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) { - switch (bits) { - default: assert(0 && "No such value"); return ARM_AM::no_shift; - case 0: return ARM_AM::lsl; - case 1: return ARM_AM::lsr; - case 2: return ARM_AM::asr; - case 3: return ARM_AM::ror; - } -} - -// See A8.4 Shifts applied to a register. -// A8.4.1 Constant shifts. -// -// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5 -// encodings into the intended ShiftOpc and shift amount. -// -// A8-11: DecodeImmShift() -static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) { - if (ShImm != 0) - return; - switch (ShOp) { - case ARM_AM::no_shift: - case ARM_AM::rrx: - break; - case ARM_AM::lsl: - ShOp = ARM_AM::no_shift; - break; - case ARM_AM::lsr: - case ARM_AM::asr: - ShImm = 32; - break; - case ARM_AM::ror: - ShOp = ARM_AM::rrx; - break; - } -} - -// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding -// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API -// clients should pass in the value encoded with two bits, so the assert stays -// to signal a wrong API usage. -static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { - switch (bits) { - default: assert(0 && "No such value"); return ARM_AM::bad_am_submode; - case 1: return ARM_AM::ia; // P=0 U=1 - case 3: return ARM_AM::ib; // P=1 U=1 - case 0: return ARM_AM::da; // P=0 U=0 - case 2: return ARM_AM::db; // P=1 U=0 - } -} - -//////////////////////////////////////////// -// // -// Disassemble function definitions // -// // -//////////////////////////////////////////// - -/// There is a separate Disassemble*Frm function entry for disassembly of an ARM -/// instr into a list of MCOperands in the appropriate order, with possible dst, -/// followed by possible src(s). -/// -/// The processing of the predicate, and the 'S' modifier bit, if MI modifies -/// the CPSR, is factored into ARMBasicMCBuilder's method named -/// TryPredicateAndSBitModifier. - -static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - assert(0 && "Unexpected pseudo instruction!"); - return false; -} - -// A8.6.94 MLA -// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE; -// -// A8.6.105 MUL -// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; -// -// A8.6.246 UMULL -// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE; -// if dHi == dLo then UNPREDICTABLE; -static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { - unsigned R19_16 = slice(insn, 19, 16); - unsigned R15_12 = slice(insn, 15, 12); - unsigned R11_8 = slice(insn, 11, 8); - unsigned R3_0 = slice(insn, 3, 0); - switch (Opcode) { - default: - // Did we miss an opcode? - DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!"); - return false; - case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT: - case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT: - case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR: - case ARM::USADA8: - if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - return false; - case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR: - case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT: - case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX: - // A8.6.167 SMLAD & A8.6.172 SMLSD - case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX: - case ARM::USAD8: - if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - return false; - case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL: - case ARM::UMULL: - case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT: - case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX: - if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - if (R19_16 == R15_12) - return true; - return false;; - } -} - -// Multiply Instructions. -// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR, -// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience): -// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} -// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is -// only for {d, n, m}. -// -// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD, -// SMUADX, and USAD8 (for convenience): -// Rd{19-16} Rn{3-0} Rm{11-8} -// -// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, -// SMLALD, SMLADLX, SMLSLD, SMLSLDX: -// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8} -// -// The mapping of the multiply registers to the "regular" ARM registers, where -// there are convenience decoder functions, is: -// -// Inst{15-12} => Rd -// Inst{19-16} => Rn -// Inst{3-0} => Rm -// Inst{11-8} => Rs -static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm"); - assert(NumOps >= 3 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == ARM::GPRRegClassID - && "Expect three register operands"); - - // Sanity check for the register encodings. - if (BadRegsMulFrm(Opcode, insn)) - return false; - - // Instructions with two destination registers have RdLo{15-12} first. - if (NumDefs == 2) { - assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && - "Expect 4th register operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // The destination register: RdHi{19-16} or Rd{19-16}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - // The two src regsiters: Rn{3-0}, then Rm{11-8}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - OpIdx += 3; - - // Many multiply instructions (e.g., MLA) have three src registers. - // The third register operand is Ra{15-12}. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - return true; -} - -// Helper routines for disassembly of coprocessor instructions. - -static bool LdStCopOpcode(unsigned Opcode) { - if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) || - (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE)) - return true; - return false; -} -static bool CoprocessorOpcode(unsigned Opcode) { - if (LdStCopOpcode(Opcode)) - return true; - - switch (Opcode) { - default: - return false; - case ARM::CDP: case ARM::CDP2: - case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2: - case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2: - return true; - } -} -static inline unsigned GetCoprocessor(uint32_t insn) { - return slice(insn, 11, 8); -} -static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) { - return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21); -} -static inline unsigned GetCopOpc2(uint32_t insn) { - return slice(insn, 7, 5); -} -static inline unsigned GetCopOpc(uint32_t insn) { - return slice(insn, 7, 4); -} -// Most of the operands are in immediate forms, except Rd and Rn, which are ARM -// core registers. -// -// CDP, CDP2: cop opc1 CRd CRn CRm opc2 -// -// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2 -// -// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm -// -// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00 -// and friends -// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00 -// and friends -// <-- addrmode2 --> -// -// LDC_OPTION: cop CRd Rn imm8 -// and friends -// STC_OPTION: cop CRd Rn imm8 -// and friends -// -static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr"); - - unsigned &OpIdx = NumOpsAdded; - // A8.6.92 - // if coproc == '101x' then SEE "Advanced SIMD and VFP" - // But since the special instructions have more explicit encoding bits - // specified, if coproc == 10 or 11, we should reject it as invalid. - unsigned coproc = GetCoprocessor(insn); - if ((Opcode == ARM::MCR || Opcode == ARM::MCRR || - Opcode == ARM::MRC || Opcode == ARM::MRRC) && - (coproc == 10 || coproc == 11)) { - DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n"); - return false; - } - - bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 || - Opcode == ARM::MRRC || Opcode == ARM::MRRC2); - - // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}). - bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2); - bool LdStCop = LdStCopOpcode(Opcode); - bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2); - - OpIdx = 0; - - if (RtOut) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - MI.addOperand(MCOperand::CreateImm(coproc)); - ++OpIdx; - - if (LdStCop) { - // Unindex if P:W = 0b00 --> _OPTION variant - unsigned PW = getPBit(insn) << 1 | getWBit(insn); - - MI.addOperand(MCOperand::CreateImm(decodeRd(insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx += 2; - - if (PW) { - MI.addOperand(MCOperand::CreateReg(0)); - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, - ARM_AM::no_shift, IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0))); - ++OpIdx; - } - } else { - MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn) - : GetCopOpc1(insn, NoGPR))); - ++OpIdx; - - if (!RtOut) { - MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) - : MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - MI.addOperand(OneCopOpc ? MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn))) - : MCOperand::CreateImm(decodeRn(insn))); - - MI.addOperand(MCOperand::CreateImm(decodeRm(insn))); - - OpIdx += 2; - - if (!OneCopOpc) { - MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn))); - ++OpIdx; - } - } - - return true; -} - -// Branch Instructions. -// BL: SignExtend(Imm24:'00', 32) -// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 -// SMC: ZeroExtend(imm4, 32) -// SVC: ZeroExtend(Imm24, 32) -// -// Various coprocessor instructions are assigned BrFrm arbitrarily. -// Delegates to DisassembleCoprocessor() helper function. -// -// MRS/MRSsys: Rd -// MSR/MSRsys: Rm mask=Inst{19-16} -// BXJ: Rm -// MSRi/MSRsysi: so_imm -// SRSW/SRS: ldstm_mode:$amode mode_imm -// RFEW/RFE: ldstm_mode:$amode Rn -static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (CoprocessorOpcode(Opcode)) - return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - // MRS and MRSsys take one GPR reg Rd. - if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) { - assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - NumOpsAdded = 1; - return true; - } - // BXJ takes one GPR reg Rm. - if (Opcode == ARM::BXJ) { - assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 1; - return true; - } - // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in - // bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::MSR) { - assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | - slice(insn, 19, 16) /* Special Reg */ )); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 2; - return true; - } - // MSRi take a mask, followed by one so_imm operand. The mask contains the - // R Bit in bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::MSRi) { - // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate) - // The hints instructions have more specific encodings, so if mask == 0, - // we should reject this as an invalid instruction. - if (slice(insn, 19, 16) == 0) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | - slice(insn, 19, 16) /* Special Reg */ )); - // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. - // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}. - // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot(). - unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF; - unsigned Imm = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot))); - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::SRSW || Opcode == ARM::SRS || - Opcode == ARM::RFEW || Opcode == ARM::RFE) { - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - - if (Opcode == ARM::SRSW || Opcode == ARM::SRS) - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 3; - return true; - } - - assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred - || Opcode == ARM::SMC || Opcode == ARM::SVC) && - "Unexpected Opcode"); - - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); - - int Imm32 = 0; - if (Opcode == ARM::SMC) { - // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}. - Imm32 = slice(insn, 3, 0); - } else if (Opcode == ARM::SVC) { - // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}. - Imm32 = slice(insn, 23, 0); - } else { - // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}. - unsigned Imm26 = slice(insn, 23, 0) << 2; - //Imm32 = signextend<signed int, 26>(Imm26); - Imm32 = SignExtend32<26>(Imm26); - } - - MI.addOperand(MCOperand::CreateImm(Imm32)); - NumOpsAdded = 1; - - return true; -} - -// Misc. Branch Instructions. -// BX_RET, MOVPCLR -// BLX, BLX_pred, BX, BX_pred -// BLXi -static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // BX_RET and MOVPCLR have only two predicate operands; do an early return. - if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) - return true; - - // BLX and BX take one GPR reg. - if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred || - Opcode == ARM::BX || Opcode == ARM::BX_pred) { - assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - OpIdx = 1; - return true; - } - - // BLXi takes imm32 (the PC offset). - if (Opcode == ARM::BLXi) { - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); - // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}. - unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1; - int Imm32 = SignExtend32<26>(Imm26); - MI.addOperand(MCOperand::CreateImm(Imm32)); - OpIdx = 1; - return true; - } - - return false; -} - -static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { - uint32_t lsb = slice(insn, 11, 7); - uint32_t msb = slice(insn, 20, 16); - uint32_t Val = 0; - if (msb < lsb) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - - for (uint32_t i = lsb; i <= msb; ++i) - Val |= (1 << i); - mask = ~Val; - return true; -} - -// Standard data-processing instructions allow PC as a register specifier, -// but we should reject other DPFrm instructions with PC as registers. -static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - default: - // Did we miss an opcode? - if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) { - DEBUG(errs() << "DPFrm with bad reg specifier(s)\n"); - return true; - } - case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr: - case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr: - case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr: - case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr: - return false; - } -} - -// A major complication is the fact that some of the saturating add/subtract -// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. -// They are QADD, QDADD, QDSUB, and QSUB. -static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - bool isUnary = isUnaryDP(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Disassemble register def if there is one. - if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // Now disassemble the src operands. - if (OpIdx >= NumOps) - return false; - - // Special-case handling of BFC/BFI/SBFX/UBFX. - if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // A8.6.17 BFC & A8.6.18 BFI - // Sanity check Rd. - if (decodeRd(insn) == 15) - return false; - MI.addOperand(MCOperand::CreateReg(0)); - if (Opcode == ARM::BFI) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - } - uint32_t mask = 0; - if (!getBFCInvMask(insn, mask)) - return false; - - MI.addOperand(MCOperand::CreateImm(mask)); - OpIdx += 2; - return true; - } - if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { - // Sanity check Rd and Rm. - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1)); - OpIdx += 3; - return true; - } - - bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD || - Opcode == ARM::QDSUB || Opcode == ARM::QSUB); - - // BinaryDP has an Rn operand. - if (!isUnary) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - RmRn ? decodeRm(insn) : decodeRn(insn)))); - ++OpIdx; - } - - // If this is a two-address operand, skip it, e.g., MOVCCr operand 1. - if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Now disassemble operand 2. - if (OpIdx >= NumOps) - return false; - - if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - // We have a reg/reg form. - // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are - // routed here as well. - // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); - if (BadRegsDPFrm(Opcode, insn)) - return false; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - RmRn? decodeRn(insn) : decodeRm(insn)))); - ++OpIdx; - } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { - // These two instructions don't allow d as 15. - if (decodeRd(insn) == 15) - return false; - // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). - assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); - unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) - MI.addOperand(MCOperand::CreateImm(Imm16)); - ++OpIdx; - } else { - // We have a reg/imm form. - // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. - // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}. - // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot(). - assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); - unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF; - unsigned Imm = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot))); - ++OpIdx; - } - - return true; -} - -static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - bool isUnary = isUnaryDP(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Disassemble register def if there is one. - if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // Disassemble the src operands. - if (OpIdx >= NumOps) - return false; - - // BinaryDP has an Rn operand. - if (!isUnary) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // If this is a two-address operand, skip it, e.g., MOVCCs operand 1. - if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Disassemble operand 2, which consists of three components. - if (OpIdx + 2 >= NumOps) - return false; - - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+2].RegClass < 0) && - "Expect 3 reg operands"); - - // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. - unsigned Rs = slice(insn, 4, 4); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - if (Rs) { - // If Inst{7} != 0, we should reject this insn as an invalid encoding. - if (slice(insn, 7, 7)) - return false; - - // A8.6.3 ADC (register-shifted register) - // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE; - // - // This also accounts for shift instructions (register) where, fortunately, - // Inst{19-16} = 0b0000. - // A8.6.89 LSL (register) - // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || - decodeRm(insn) == 15 || decodeRs(insn) == 15) - return false; - - // Register-controlled shifts: [Rm, Rs, shift]. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0))); - } else { - // Constant shifts: [Rm, reg0, shift_imm]. - MI.addOperand(MCOperand::CreateReg(0)); // NoRegister - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm))); - } - OpIdx += 3; - - return true; -} - -static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack, - bool Imm) { - const StringRef Name = ARMInsts[Opcode].Name; - unsigned Rt = decodeRd(insn); - unsigned Rn = decodeRn(insn); - unsigned Rm = decodeRm(insn); - unsigned P = getPBit(insn); - unsigned W = getWBit(insn); - - if (Store) { - // Only STR (immediate, register) allows PC as the source. - if (Name.startswith("STRB") && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - if (WBack && (Rn == 15 || Rn == Rt)) { - DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); - return true; - } - if (!Imm && Rm == 15) { - DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - // Only LDR (immediate, register) allows PC as the destination. - if (Name.startswith("LDRB") && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - if (Imm) { - // Immediate - if (Rn == 15) { - // The literal form must be in offset mode; it's an encoding error - // otherwise. - if (!(P == 1 && W == 0)) { - DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n"); - return true; - } - // LDRB (literal) does not allow PC as the destination. - if (Opcode != ARM::LDRi12 && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - // Write back while Rn == Rt does not make sense. - if (WBack && (Rn == Rt)) { - DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); - return true; - } - } - } else { - // Register - if (Rm == 15) { - DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); - return true; - } - if (WBack && (Rn == 15 || Rn == Rt)) { - DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); - return true; - } - } - } - return false; -} - -static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(((!isStore && MCID.getNumDefs() > 0) || - (isStore && (MCID.getNumDefs() == 0 || isPrePost))) - && "Invalid arguments"); - - // Operand 0 of a pre- and post-indexed store is the address base writeback. - if (isPrePost && isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the dst/src operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // After dst of a pre- and post-indexed load is the address base writeback. - if (isPrePost && !isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the base operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) - && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - // For reg/reg form, base reg is followed by +/- reg shop imm. - // For immediate form, it is followed by +/- imm12. - // See also ARMAddressingModes.h (Addressing Mode #2). - if (OpIdx + 1 >= NumOps) - return false; - - if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0)) - return false; - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - if (getIBit(insn) == 0) { - // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). - // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already - // been populated. - if (isPrePost) { - MI.addOperand(MCOperand::CreateReg(0)); - OpIdx += 1; - } - - unsigned Imm12 = slice(insn, 11, 0); - if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 || - Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) { - // Disassemble the 12-bit immediate offset, which is the second operand in - // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm). - int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12; - MI.addOperand(MCOperand::CreateImm(Offset)); - } else { - // Disassemble the 12-bit immediate offset, which is the second operand in - // $am2offset => (ops GPR, i32imm). - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift, - IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } - OpIdx += 1; - } else { - // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid. - if (slice(insn,4,4) == 1) - return false; - - // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode))); - OpIdx += 2; - } - - return true; -} - -static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B); -} - -static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); -} - -static bool HasDualReg(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST: - return true; - } -} - -static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(((!isStore && MCID.getNumDefs() > 0) || - (isStore && (MCID.getNumDefs() == 0 || isPrePost))) - && "Invalid arguments"); - - // Operand 0 of a pre- and post-indexed store is the address base writeback. - if (isPrePost && isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the dst/src operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // Fill in LDRD and STRD's second operand Rt operand. - if (HasDualReg(Opcode)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn) + 1))); - ++OpIdx; - } - - // After dst of a pre- and post-indexed load is the address base writeback. - if (isPrePost && !isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the base operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) - && "Offset mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - // For reg/reg form, base reg is followed by +/- reg. - // For immediate form, it is followed by +/- imm8. - // See also ARMAddressingModes.h (Addressing Mode #3). - if (OpIdx + 1 >= NumOps) - return false; - - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - if (getAM3IBit(insn) == 1) { - MI.addOperand(MCOperand::CreateReg(0)); - - // Disassemble the 8-bit immediate offset. - unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF; - unsigned Imm4L = insn & 0xF; - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L, - IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } else { - // Disassemble the offset reg (Rm). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } - OpIdx += 2; - - return true; -} - -static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, - B); -} - -static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); -} - -// The algorithm for disassembly of LdStMulFrm is different from others because -// it explicitly populates the two predicate operands after the base register. -// After that, we need to populate the reglist with each affected register -// encoded as an MCOperand. -static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4"); - NumOpsAdded = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base, if necessary. - if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD || - Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD || - Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD || - Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - } - - // Add the base register operand. - MI.addOperand(MCOperand::CreateReg(Base)); - - // Handling the two predicate operands before the reglist. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondVal)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - - NumOpsAdded += 3; - - // Fill the variadic part of reglist. - unsigned RegListBits = insn & ((1 << 16) - 1); - for (unsigned i = 0; i < 16; ++i) { - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++NumOpsAdded; - } - } - - return true; -} - -// LDREX, LDREXB, LDREXH: Rd Rn -// LDREXD: Rd Rd+1 Rn -// STREX, STREXB, STREXH: Rd Rm Rn -// STREXD: Rd Rm Rm+1 Rn -// -// SWP, SWPB: Rd Rm Rn -static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool isStore = slice(insn, 20, 20) == 0; - bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // Store register Exclusive needs a source operand. - if (isStore) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)+1))); - ++OpIdx; - } - } else if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)+1))); - ++OpIdx; - } - - // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - return true; -} - -// Misc. Arithmetic Instructions. -// CLZ: Rd Rm -// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5 -// RBIT, REV, REV16, REVSH: Rd Rm -static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - - // Sanity check the registers, which should not be 15. - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - if (ThreeReg && decodeRn(insn) == 15) - return false; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - if (ThreeReg) { - assert(NumOps >= 4 && "Expect >= 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - // If there is still an operand info left which is an immediate operand, add - // an additional imm5 LSL/ASR operand. - if (ThreeReg && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Extract the 5-bit immediate field Inst{11-7}. - unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; - ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; - if (Opcode == ARM::PKHBT) - Opc = ARM_AM::lsl; - else if (Opcode == ARM::PKHTB) - Opc = ARM_AM::asr; - getImmShiftSE(Opc, ShiftAmt); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); - ++OpIdx; - } - - return true; -} - -/// DisassembleSatFrm - Disassemble saturate instructions: -/// SSAT, SSAT16, USAT, and USAT16. -static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // A8.6.183 SSAT - // if d == 15 || n == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands - - // Disassemble register def. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - unsigned Pos = slice(insn, 20, 16); - if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16) - Pos += 1; - MI.addOperand(MCOperand::CreateImm(Pos)); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - if (NumOpsAdded == 4) { - ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 11, 7); - if (ShAmt == 0) { - // A8.6.183. Possible ASR shift amount of 32... - if (Opc == ARM_AM::asr) - ShAmt = 32; - else - Opc = ARM_AM::no_shift; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); - } - return true; -} - -// Extend instructions. -// SXT* and UXT*: Rd [Rn] Rm [rot_imm]. -// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the -// three register operand form. Otherwise, Rn=0b1111 and only Rm is used. -static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // A8.6.220 SXTAB - // if d == 15 || m == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - // If there is still an operand info left which is an immediate operand, add - // an additional rotate immediate operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Extract the 2-bit rotate field Inst{11-10}. - unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3; - // Rotation by 8, 16, or 24 bits. - MI.addOperand(MCOperand::CreateImm(rot << 3)); - ++OpIdx; - } - - return true; -} - -///////////////////////////////////// -// // -// Utility Functions For VFP // -// // -///////////////////////////////////// - -// Extract/Decode Dd/Sd: -// -// SP => d = UInt(Vd:D) -// DP => d = UInt(D:Vd) -static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn)) - : (decodeRd(insn) | getDBit(insn) << 4); -} - -// Extract/Decode Dn/Sn: -// -// SP => n = UInt(Vn:N) -// DP => n = UInt(N:Vn) -static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn)) - : (decodeRn(insn) | getNBit(insn) << 4); -} - -// Extract/Decode Dm/Sm: -// -// SP => m = UInt(Vm:M) -// DP => m = UInt(M:Vm) -static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn)) - : (decodeRm(insn) | getMBit(insn) << 4); -} - -// A7.5.1 -static APInt VFPExpandImm(unsigned char byte, unsigned N) { - assert(N == 32 || N == 64); - - uint64_t Result; - unsigned bit6 = slice(byte, 6, 6); - if (N == 32) { - Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19; - if (bit6) - Result |= 0x1f << 25; - else - Result |= 0x1 << 30; - } else { - Result = (uint64_t)slice(byte, 7, 7) << 63 | - (uint64_t)slice(byte, 5, 0) << 48; - if (bit6) - Result |= 0xffULL << 54; - else - Result |= 0x1ULL << 62; - } - return APInt(N, Result); -} - -// VFP Unary Format Instructions: -// -// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero -// VCVTDS, VCVTSD: converts between double-precision and single-precision -// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers. -static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - bool isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); - ++OpIdx; - - // Early return for compare with zero instructions. - if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS - || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS) - return true; - - RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); - ++OpIdx; - - return true; -} - -// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs. -// Some of them have operand constraints which tie the first operand in the -// InOperandList to that of the dst. As far as asm printing is concerned, this -// tied_to operand is simply skipped. -static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - bool isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); - ++OpIdx; - - // Skip tied_to operand constraint. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - assert(NumOps >= 4 && "Expect >=4 operands"); - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP)))); - ++OpIdx; - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); - ++OpIdx; - - return true; -} - -// A8.6.295 vcvt (floating-point <-> integer) -// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS -// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S -// -// A8.6.297 vcvt (floating-point and fixed-point) -// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) -static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 - bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297 - unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - if (fixed_point) { - // A8.6.297 - assert(NumOps >= 3 && "Expect >= 3 operands"); - int size = slice(insn, 7, 7) == 0 ? 16 : 32; - int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5)); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClassID, - decodeVFPRd(insn, SP)))); - - assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && - "Tied to operand expected"); - MI.addOperand(MI.getOperand(0)); - - assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef() && "Imm operand expected"); - MI.addOperand(MCOperand::CreateImm(fbits)); - - NumOpsAdded = 3; - } else { - // A8.6.295 - // The Rd (destination) and Rm (source) bits have different interpretations - // depending on their single-precisonness. - unsigned d, m; - if (slice(insn, 18, 18) == 1) { // to_integer operation - d = decodeVFPRd(insn, true /* Is Single Precision */); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::SPRRegClassID, d))); - m = decodeVFPRm(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m))); - } else { - d = decodeVFPRd(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d))); - m = decodeVFPRm(insn, true /* Is Single Precision */); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::SPRRegClassID, m))); - } - NumOpsAdded = 2; - } - - return true; -} - -// VMOVRS - A8.6.330 -// Rt => Rd; Sn => UInt(Vn:N) -static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - decodeVFPRn(insn, true)))); - NumOpsAdded = 2; - return true; -} - -// VMOVRRD - A8.6.332 -// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm) -// -// VMOVRRS - A8.6.331 -// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 -static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx = 2; - - if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { - unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm+1))); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::DPRRegClassID, - decodeVFPRm(insn, false)))); - ++OpIdx; - } - return true; -} - -// VMOVSR - A8.6.330 -// Rt => Rd; Sn => UInt(Vn:N) -static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - decodeVFPRn(insn, true)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - NumOpsAdded = 2; - return true; -} - -// VMOVDRR - A8.6.332 -// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm) -// -// VMOVRRS - A8.6.331 -// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 -static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { - unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm+1))); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::DPRRegClassID, - decodeVFPRm(insn, false)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx += 2; - return true; -} - -// VFP Load/Store Instructions. -// VLDRD, VLDRS, VSTRD, VSTRS -static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); - - bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS); - unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - // Extract Dd/Sd for operand 0. - unsigned RegD = decodeVFPRd(insn, isSPVFP); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD))); - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - MI.addOperand(MCOperand::CreateReg(Base)); - - // Next comes the AM5 Opcode. - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned char Imm8 = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8))); - - NumOpsAdded = 3; - - return true; -} - -// VFP Load/Store Multiple Instructions. -// We have an optional write back reg, the base, and two predicate operands. -// It is then followed by a reglist of either DPR(s) or SPR(s). -// -// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] -static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base, if necessary. - if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD || - Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD || - Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(Base)); - - // Handling the two predicate operands before the reglist. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondVal)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - - OpIdx += 3; - - bool isSPVFP = (Opcode == ARM::VLDMSIA || - Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMSIA || - Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); - unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - // Extract Dd/Sd. - unsigned RegD = decodeVFPRd(insn, isSPVFP); - - // Fill the variadic part of reglist. - unsigned char Imm8 = insn & 0xFF; - unsigned Regs = isSPVFP ? Imm8 : Imm8/2; - - // Apply some sanity checks before proceeding. - if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16)) - return false; - - for (unsigned i = 0; i < Regs; ++i) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, - RegD + i))); - ++OpIdx; - } - - return true; -} - -// Misc. VFP Instructions. -// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand) -// FCONSTD (DPR and a VFPf64Imm operand) -// FCONSTS (SPR and a VFPf32Imm operand) -// VMRS/VMSR (GPR operand) -static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - if (Opcode == ARM::FMSTAT) - return true; - - assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands"); - - unsigned RegEnum = 0; - switch (OpInfo[0].RegClass) { - case ARM::DPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false)); - break; - case ARM::SPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true)); - break; - case ARM::GPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)); - break; - default: - assert(0 && "Invalid reg class id"); - return false; - } - - MI.addOperand(MCOperand::CreateReg(RegEnum)); - ++OpIdx; - - // Extract/decode the f64/f32 immediate. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // The asm syntax specifies the floating point value, not the 8-bit literal. - APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), - Opcode == ARM::FCONSTD ? 64 : 32); - APFloat immFP = APFloat(immRaw, true); - double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() : - immFP.convertToFloat(); - MI.addOperand(MCOperand::CreateFPImm(imm)); - - ++OpIdx; - } - - return true; -} - -// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file. -#include "ThumbDisassemblerCore.h" - -///////////////////////////////////////////////////// -// // -// Utility Functions For ARM Advanced SIMD // -// // -///////////////////////////////////////////////////// - -// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that -// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL. - -// A7.3 Register encoding - -// Extract/Decode NEON D/Vd: -// -// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for -// doubleword, Dd = UInt(D:Vd). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// D = Inst{22}, Vd = Inst{15-12} -static unsigned decodeNEONRd(uint32_t insn) { - return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask); -} - -// Extract/Decode NEON N/Vn: -// -// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for -// doubleword, Dn = UInt(N:Vn). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// N = Inst{7}, Vn = Inst{19-16} -static unsigned decodeNEONRn(uint32_t insn) { - return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask); -} - -// Extract/Decode NEON M/Vm: -// -// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for -// doubleword, Dm = UInt(M:Vm). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// M = Inst{5}, Vm = Inst{3-0} -static unsigned decodeNEONRm(uint32_t insn) { - return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask); -} - -namespace { -enum ElemSize { - ESizeNA = 0, - ESize8 = 8, - ESize16 = 16, - ESize32 = 32, - ESize64 = 64 -}; -} // End of unnamed namespace - -// size field -> Inst{11-10} -// index_align field -> Inst{7-4} -// -// The Lane Index interpretation depends on the Data Size: -// 8 (encoded as size = 0b00) -> Index = index_align[3:1] -// 16 (encoded as size = 0b01) -> Index = index_align[3:2] -// 32 (encoded as size = 0b10) -> Index = index_align[3] -// -// Ref: A8.6.317 VLD4 (single 4-element structure to one lane). -static unsigned decodeLaneIndex(uint32_t insn) { - unsigned size = insn >> 10 & 3; - assert((size == 0 || size == 1 || size == 2) && - "Encoding error: size should be either 0, 1, or 2"); - - unsigned index_align = insn >> 4 & 0xF; - return (index_align >> 1) >> size; -} - -// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4) -// op = Inst{5}, cmode = Inst{11-8} -// i = Inst{24} (ARM architecture) -// imm3 = Inst{18-16}, imm4 = Inst{3-0} -// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions. -static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { - unsigned char op = (insn >> 5) & 1; - unsigned char cmode = (insn >> 8) & 0xF; - unsigned char Imm8 = ((insn >> 24) & 1) << 7 | - ((insn >> 16) & 7) << 4 | - (insn & 0xF); - return (op << 12) | (cmode << 8) | Imm8; -} - -// A8.6.339 VMUL, VMULL (by scalar) -// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7 -// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15 -static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize16: - return insn & 7; - case ESize32: - return insn & 0xF; - default: - assert(0 && "Unreachable code!"); - return 0; - } -} - -// A8.6.339 VMUL, VMULL (by scalar) -// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7 -// ESize32 => index = Inst{5} (M) D0-D15 -static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize16: - return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1); - case ESize32: - return (insn >> 5) & 1; - default: - assert(0 && "Unreachable code!"); - return 0; - } -} - -// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD) -// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}. -static unsigned decodeVCVTFractionBits(uint32_t insn) { - return 64 - ((insn >> 16) & 0x3F); -} - -// A8.6.302 VDUP (scalar) -// ESize8 => index = Inst{19-17} -// ESize16 => index = Inst{19-18} -// ESize32 => index = Inst{19} -static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize8: - return (insn >> 17) & 7; - case ESize16: - return (insn >> 18) & 3; - case ESize32: - return (insn >> 19) & 1; - default: - assert(0 && "Unspecified element size!"); - return 0; - } -} - -// A8.6.328 VMOV (ARM core register to scalar) -// A8.6.329 VMOV (scalar to ARM core register) -// ESize8 => index = Inst{21:6-5} -// ESize16 => index = Inst{21:6} -// ESize32 => index = Inst{21} -static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize8: - return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3); - case ESize16: - return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1); - case ESize32: - return ((insn >> 21) & 1); - default: - assert(0 && "Unspecified element size!"); - return 0; - } -} - -// Imm6 = Inst{21-16}, L = Inst{7} -// -// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI): -// case L:imm6 of -// '0001xxx' => esize = 8; shift_amount = imm6 - 8 -// '001xxxx' => esize = 16; shift_amount = imm6 - 16 -// '01xxxxx' => esize = 32; shift_amount = imm6 - 32 -// '1xxxxxx' => esize = 64; shift_amount = imm6 -// -// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN): -// case L:imm6 of -// '0001xxx' => esize = 8; shift_amount = 16 - imm6 -// '001xxxx' => esize = 16; shift_amount = 32 - imm6 -// '01xxxxx' => esize = 32; shift_amount = 64 - imm6 -// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6 -// -static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) { - ElemSize esize = ESizeNA; - unsigned L = (insn >> 7) & 1; - unsigned imm6 = (insn >> 16) & 0x3F; - if (L == 0) { - if (imm6 >> 3 == 1) - esize = ESize8; - else if (imm6 >> 4 == 1) - esize = ESize16; - else if (imm6 >> 5 == 1) - esize = ESize32; - else - assert(0 && "Wrong encoding of Inst{7:21-16}!"); - } else - esize = ESize64; - - if (LeftShift) - return esize == ESize64 ? imm6 : (imm6 - esize); - else - return esize == ESize64 ? (esize - imm6) : (2*esize - imm6); -} - -// A8.6.305 VEXT -// Imm4 = Inst{11-8} -static unsigned decodeN3VImm(uint32_t insn) { - return (insn >> 8) & 0xF; -} - -// VLD* -// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] -// VLD*LN* -// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx) -// VST* -// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... -// VST*LN* -// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)] -// -// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. -static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, - unsigned alignment, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - // At least one DPR register plus addressing mode #6. - assert(NumOps >= 3 && "Expect >= 3 operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // We have homogeneous NEON registers for Load/Store. - unsigned RegClass = 0; - - // Double-spaced registers have increments of 2. - unsigned Inc = DblSpaced ? 2 : 1; - - unsigned Rn = decodeRn(insn); - unsigned Rm = decodeRm(insn); - unsigned Rd = decodeNEONRd(insn); - - // A7.7.1 Advanced SIMD addressing mode. - bool WB = Rm != 15; - - // LLVM Addressing Mode #6. - unsigned RmEnum = 0; - if (WB && Rm != 13) - RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm); - - if (Store) { - // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's, - // then possible lane index. - assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - - if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - ++OpIdx; - } - - assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); - // addrmode6 := (ops GPR:$addr, i32imm) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment - OpIdx += 2; - - if (WB) { - MI.addOperand(MCOperand::CreateReg(RmEnum)); - ++OpIdx; - } - - assert(OpIdx < NumOps && - (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID || - OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) && - "Reg operand expected"); - - RegClass = OpInfo[OpIdx].RegClass; - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd))); - Rd += Inc; - ++OpIdx; - } - - // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); - ++OpIdx; - } - - } else { - // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg, - // possible TIED_TO DPR/QPR's (ignored), then possible lane index. - RegClass = OpInfo[0].RegClass; - - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd))); - Rd += Inc; - ++OpIdx; - } - - if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - ++OpIdx; - } - - assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); - // addrmode6 := (ops GPR:$addr, i32imm) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment - OpIdx += 2; - - if (WB) { - MI.addOperand(MCOperand::CreateReg(RmEnum)); - ++OpIdx; - } - - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 && - "Tied to operand expected"); - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); - ++OpIdx; - } - } - - // Accessing registers past the end of the NEON register file is not - // defined. - if (Rd > 32) - return false; - - return true; -} - -// A8.6.308, A8.6.311, A8.6.314, A8.6.317. -static bool Align4OneLaneInst(unsigned elem, unsigned size, - unsigned index_align, unsigned & alignment) { - unsigned bits = 0; - switch (elem) { - default: - return false; - case 1: - // A8.6.308 - if (size == 0) - return slice(index_align, 0, 0) == 0; - else if (size == 1) { - bits = slice(index_align, 1, 0); - if (bits != 0 && bits != 1) - return false; - if (bits == 1) - alignment = 16; - return true; - } else if (size == 2) { - bits = slice(index_align, 2, 0); - if (bits != 0 && bits != 3) - return false; - if (bits == 3) - alignment = 32; - return true;; - } - return true; - case 2: - // A8.6.311 - if (size == 0) { - if (slice(index_align, 0, 0) == 1) - alignment = 16; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) == 1) - alignment = 32; - return true; - } else if (size == 2) { - if (slice(index_align, 1, 1) != 0) - return false; - if (slice(index_align, 0, 0) == 1) - alignment = 64; - return true;; - } - return true; - case 3: - // A8.6.314 - if (size == 0) { - if (slice(index_align, 0, 0) != 0) - return false; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) != 0) - return false; - return true; - return true; - } else if (size == 2) { - if (slice(index_align, 1, 0) != 0) - return false; - return true;; - } - return true; - case 4: - // A8.6.317 - if (size == 0) { - if (slice(index_align, 0, 0) == 1) - alignment = 32; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) == 1) - alignment = 64; - return true; - } else if (size == 2) { - bits = slice(index_align, 1, 0); - if (bits == 3) - return false; - if (bits == 1) - alignment = 64; - else if (bits == 2) - alignment = 128; - return true;; - } - return true; - } -} - -// A7.7 -// If L (Inst{21}) == 0, store instructions. -// Find out about double-spaced-ness of the Opcode and pass it on to -// DisassembleNLdSt0(). -static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const StringRef Name = ARMInsts[Opcode].Name; - bool DblSpaced = false; - // 0 represents standard alignment, i.e., unaligned data access. - unsigned alignment = 0; - - unsigned elem = 0; // legal values: {1, 2, 3, 4} - if (Name.startswith("VST1") || Name.startswith("VLD1")) - elem = 1; - - if (Name.startswith("VST2") || Name.startswith("VLD2")) - elem = 2; - - if (Name.startswith("VST3") || Name.startswith("VLD3")) - elem = 3; - - if (Name.startswith("VST4") || Name.startswith("VLD4")) - elem = 4; - - if (Name.find("LN") != std::string::npos) { - // To one lane instructions. - // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane). - - // Utility function takes number of elements, size, and index_align. - if (!Align4OneLaneInst(elem, - slice(insn, 11, 10), - slice(insn, 7, 4), - alignment)) - return false; - - // <size> == 16 && Inst{5} == 1 --> DblSpaced = true - if (Name.endswith("16") || Name.endswith("16_UPD")) - DblSpaced = slice(insn, 5, 5) == 1; - - // <size> == 32 && Inst{6} == 1 --> DblSpaced = true - if (Name.endswith("32") || Name.endswith("32_UPD")) - DblSpaced = slice(insn, 6, 6) == 1; - } else if (Name.find("DUP") != std::string::npos) { - // Single element (or structure) to all lanes. - // Inst{9-8} encodes the number of element(s) in the structure, with: - // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32. - // 0b01 (VLD2DUP) - // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0) - // 0b11 (VLD4DUP) - // - // Inst{7-6} encodes the data size, with: - // 0b00 => 8, 0b01 => 16, 0b10 => 32 - // - // Inst{4} (the a bit) encodes the align action (0: standard alignment) - unsigned elem = slice(insn, 9, 8) + 1; - unsigned a = slice(insn, 4, 4); - if (elem != 3) { - // 0b11 is not a valid encoding for Inst{7-6}. - if (slice(insn, 7, 6) == 3) - return false; - unsigned data_size = 8 << slice(insn, 7, 6); - // For VLD1DUP, a bit makes sense only for data size of 16 and 32. - if (a && data_size == 8) - return false; - - // Now we can calculate the alignment! - if (a) - alignment = elem * data_size; - } else { - if (a) { - // A8.6.315 VLD3 (single 3-element structure to all lanes) - // The a bit must be encoded as 0. - return false; - } - } - } else { - // Multiple n-element structures with type encoded as Inst{11-8}. - // See, for example, A8.6.316 VLD4 (multiple 4-element structures). - - // Inst{5-4} encodes alignment. - unsigned align = slice(insn, 5, 4); - switch (align) { - default: - break; - case 1: - alignment = 64; break; - case 2: - alignment = 128; break; - case 3: - alignment = 256; break; - } - - unsigned type = slice(insn, 11, 8); - // Reject UNDEFINED instructions based on type and align. - // Plus set DblSpaced flag where appropriate. - switch (elem) { - default: - break; - case 1: - // n == 1 - // A8.6.307 & A8.6.391 - if ((type == 7 && slice(align, 1, 1) == 1) || - (type == 10 && align == 3) || - (type == 6 && slice(align, 1, 1) == 1)) - return false; - break; - case 2: - // n == 2 && type == 0b1001 -> DblSpaced = true - // A8.6.310 & A8.6.393 - if ((type == 8 || type == 9) && align == 3) - return false; - DblSpaced = (type == 9); - break; - case 3: - // n == 3 && type == 0b0101 -> DblSpaced = true - // A8.6.313 & A8.6.395 - if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1) - return false; - DblSpaced = (type == 5); - break; - case 4: - // n == 4 && type == 0b0001 -> DblSpaced = true - // A8.6.316 & A8.6.397 - if (slice(insn, 7, 6) == 3) - return false; - DblSpaced = (type == 1); - break; - } - } - return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B); -} - -// VMOV (immediate) -// Qd/Dd imm -// VBIC (immediate) -// VORR (immediate) -// Qd/Dd imm src(=Qd/Dd) -static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, - decodeNEONRd(insn)))); - - ElemSize esize = ESizeNA; - switch (Opcode) { - case ARM::VMOVv8i8: - case ARM::VMOVv16i8: - esize = ESize8; - break; - case ARM::VMOVv4i16: - case ARM::VMOVv8i16: - case ARM::VMVNv4i16: - case ARM::VMVNv8i16: - case ARM::VBICiv4i16: - case ARM::VBICiv8i16: - case ARM::VORRiv4i16: - case ARM::VORRiv8i16: - esize = ESize16; - break; - case ARM::VMOVv2i32: - case ARM::VMOVv4i32: - case ARM::VMVNv2i32: - case ARM::VMVNv4i32: - case ARM::VBICiv2i32: - case ARM::VBICiv4i32: - case ARM::VORRiv2i32: - case ARM::VORRiv4i32: - esize = ESize32; - break; - case ARM::VMOVv1i64: - case ARM::VMOVv2i64: - esize = ESize64; - break; - default: - assert(0 && "Unexpected opcode!"); - return false; - } - - // One register and a modified immediate value. - // Add the imm operand. - MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize))); - - NumOpsAdded = 2; - - // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in. - if (NumOps >= 3 && - (OpInfo[2].RegClass == ARM::DPRRegClassID || - OpInfo[2].RegClass == ARM::QPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, - decodeNEONRd(insn)))); - NumOpsAdded += 1; - } - - return true; -} - -namespace { -enum N2VFlag { - N2V_None, - N2V_VectorDupLane, - N2V_VectorConvert_Between_Float_Fixed -}; -} // End of unnamed namespace - -// Vector Convert [between floating-point and fixed-point] -// Qd/Dd Qm/Dm [fbits] -// -// Vector Duplicate Lane (from scalar to all elements) Instructions. -// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q: -// Qd/Dd Dm index -// -// Vector Move Long: -// Qd Dm -// -// Vector Move Narrow: -// Dd Qm -// -// Others -static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opc]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 2 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - ElemSize esize = ESizeNA; - if (Flag == N2V_VectorDupLane) { - // VDUPLN has its index embedded. Its size can be inferred from the Opcode. - assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q && - "Unexpected Opcode"); - esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8 - : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16 - : ESize32); - } - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - // VPADAL... - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRm(insn)))); - ++OpIdx; - - // VZIP and others have two TIED_TO reg operands. - int Idx; - while (OpIdx < NumOps && - (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Add TIED_TO operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Add the imm operand, if required. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - - unsigned imm = 0xFFFFFFFF; - - if (Flag == N2V_VectorDupLane) - imm = decodeNVLaneDupIndex(insn, esize); - if (Flag == N2V_VectorConvert_Between_Float_Fixed) - imm = decodeVCVTFractionBits(insn); - - assert(imm != 0xFFFFFFFF && "Internal error"); - MI.addOperand(MCOperand::CreateImm(imm)); - ++OpIdx; - } - - return true; -} - -static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_None, B); -} -static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorConvert_Between_Float_Fixed, B); -} -static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorDupLane, B); -} - -// Vector Shift [Accumulate] Instructions. -// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt -// -// Vector Shift Left Long (with maximum shift count) Instructions. -// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size) -// -static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 3 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 3 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID || - OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) && - "Reg operand expected"); - - // Qm/Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRm(insn)))); - ++OpIdx; - - assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); - - // Add the imm operand. - - // VSHLL has maximum shift count as the imm, inferred from its size. - unsigned Imm; - switch (Opcode) { - default: - Imm = decodeNVSAmt(insn, LeftShift); - break; - case ARM::VSHLLi8: - Imm = 8; - break; - case ARM::VSHLLi16: - Imm = 16; - break; - case ARM::VSHLLi32: - Imm = 32; - break; - } - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - - return true; -} - -// Left shift instructions. -static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true, - B); -} -// Right shift instructions have different shift amount interpretation. -static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false, - B); -} - -namespace { -enum N3VFlag { - N3V_None, - N3V_VectorExtract, - N3V_VectorShift, - N3V_Multiply_By_Scalar -}; -} // End of unnamed namespace - -// NEON Three Register Instructions with Optional Immediate Operand -// -// Vector Extract Instructions. -// Qd/Dd Qn/Dn Qm/Dm imm4 -// -// Vector Shift (Register) Instructions. -// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n) -// -// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions. -// Qd/Dd Qn/Dn RestrictedDm index -// -// Others -static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs. - assert(NumOps >= 3 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 3 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - bool VdVnVm = Flag == N3V_VectorShift ? false : true; - bool IsImm4 = Flag == N3V_VectorExtract ? true : false; - bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false; - ElemSize esize = ESizeNA; - if (Flag == N3V_Multiply_By_Scalar) { - unsigned size = (insn >> 20) & 3; - if (size == 1) esize = ESize16; - if (size == 2) esize = ESize32; - assert (esize == ESize16 || esize == ESize32); - } - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - // VABA, VABAL, VBSLd, VBSLq, ... - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Dn = Inst{7:19-16} => NEON Rn - // or - // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, - VdVnVm ? decodeNEONRn(insn) - : decodeNEONRm(insn)))); - ++OpIdx; - - // Dm = Inst{5:3-0} => NEON Rm - // or - // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise - // or - // Dn = Inst{7:19-16} => NEON Rn - unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize) - : decodeNEONRm(insn)) - : decodeNEONRn(insn); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Add the imm operand. - unsigned Imm = 0; - if (IsImm4) - Imm = decodeN3VImm(insn); - else if (IsDmRestricted) - Imm = decodeRestrictedDmIndex(insn, esize); - else { - assert(0 && "Internal error: unreachable code!"); - return false; - } - - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - } - - return true; -} - -static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_None, B); -} -static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorShift, B); -} -static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorExtract, B); -} -static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_Multiply_By_Scalar, B); -} - -// Vector Table Lookup -// -// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm -// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm -// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm -// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm -static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::DPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass == ARM::DPRRegClassID && - "Expect >= 3 operands and first 3 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned Rn = decodeNEONRn(insn); - - // {Dn} encoded as len = 0b00 - // {Dn Dn+1} encoded as len = 0b01 - // {Dn Dn+1 Dn+2 } encoded as len = 0b10 - // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11 - unsigned Len = slice(insn, 9, 8) + 1; - - // Dd (the destination vector) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRd(insn)))); - ++OpIdx; - - // Process tied_to operand constraint. - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Do the <list> now. - for (unsigned i = 0; i < Len; ++i) { - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - Rn + i))); - ++OpIdx; - } - - // Dm (the index vector) - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && - "Reg operand (index vector) expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRm(insn)))); - ++OpIdx; - - return true; -} - -// Vector Get Lane (move scalar to ARM core register) Instructions. -// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index -static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(MCID.getNumDefs() == 1 && NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass < 0 && - "Expect >= 3 operands with one dst operand"); - - ElemSize esize = - Opcode == ARM::VGETLNi32 ? ESize32 - : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16 - : ESize8); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - // Dn = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRn(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); - - NumOpsAdded = 3; - return true; -} - -// Vector Set Lane (move ARM core register to scalar) Instructions. -// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index -static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(MCID.getNumDefs() == 1 && NumOps >= 3 && - OpInfo[0].RegClass == ARM::DPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && - OpInfo[2].RegClass == ARM::GPRRegClassID && - OpInfo[3].RegClass < 0 && - "Expect >= 3 operands with one dst operand"); - - ElemSize esize = - Opcode == ARM::VSETLNi8 ? ESize8 - : (Opcode == ARM::VSETLNi16 ? ESize16 - : ESize32); - - // Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRn(insn)))); - - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); - - NumOpsAdded = 4; - return true; -} - -// Vector Duplicate Instructions (from ARM core register to all elements). -// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt -static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - OpInfo[1].RegClass == ARM::GPRRegClassID && - "Expect >= 2 operands and first 2 as reg operand"); - - unsigned RegClass = OpInfo[0].RegClass; - - // Qd/Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass, - decodeNEONRn(insn)))); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - NumOpsAdded = 2; - return true; -} - -static inline bool PreLoadOpcode(unsigned Opcode) { - switch(Opcode) { - case ARM::PLDi12: case ARM::PLDrs: - case ARM::PLDWi12: case ARM::PLDWrs: - case ARM::PLIi12: case ARM::PLIrs: - return true; - default: - return false; - } -} - -static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // Preload Data/Instruction requires either 2 or 3 operands. - // PLDi12, PLDWi12, PLIi12: addrmode_imm12 - // PLDrs, PLDWrs, PLIrs: ldst_so_reg - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12 - || Opcode == ARM::PLIi12) { - unsigned Imm12 = slice(insn, 11, 0); - bool Negative = getUBit(insn) == 0; - - // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12. - if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) { - DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n"); - MI.setOpcode(ARM::PLDi12); - } - - // -0 is represented specially. All other values are as normal. - int Offset = Negative ? -1 * Imm12 : Imm12; - if (Imm12 == 0 && Negative) - Offset = INT32_MIN; - - MI.addOperand(MCOperand::CreateImm(Offset)); - NumOpsAdded = 2; - } else { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); - NumOpsAdded = 3; - } - - return true; -} - -static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) { - // Inst{3-0} encodes the memory barrier option for the variants. - unsigned opt = slice(insn, 3, 0); - switch (opt) { - case ARM_MB::SY: case ARM_MB::ST: - case ARM_MB::ISH: case ARM_MB::ISHST: - case ARM_MB::NSH: case ARM_MB::NSHST: - case ARM_MB::OSH: case ARM_MB::OSHST: - MI.addOperand(MCOperand::CreateImm(opt)); - NumOpsAdded = 1; - return true; - default: - return false; - } - } - - switch (Opcode) { - case ARM::CLREX: - case ARM::NOP: - case ARM::TRAP: - case ARM::YIELD: - case ARM::WFE: - case ARM::WFI: - case ARM::SEV: - return true; - case ARM::SWP: - case ARM::SWPB: - // SWP, SWPB: Rd Rm Rn - // Delegate to DisassembleLdStExFrm().... - return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - default: - break; - } - - if (Opcode == ARM::SETEND) { - NumOpsAdded = 1; - MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9))); - return true; - } - - // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different - // opcodes which match the same real instruction. This is needed since there's - // no current handling of optional arguments. Fix here when a better handling - // of optional arguments is implemented. - if (Opcode == ARM::CPS3p) { // M = 1 - // Let's reject these impossible imod values by returning false: - // 1. (imod=0b01) - // - // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an - // invalid combination, so we just check for imod=0b00 here. - if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 3; - return true; - } - if (Opcode == ARM::CPS2p) { // mode = 0, M = 0 - // Let's reject these impossible imod values by returning false: - // 1. (imod=0b00,M=0) - // 2. (imod=0b01) - if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1 - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 1; - return true; - } - - // DBG has its option specified in Inst{3-0}. - if (Opcode == ARM::DBG) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}). - if (Opcode == ARM::BKPT) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 | - slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - if (PreLoadOpcode(Opcode)) - return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - assert(0 && "Unexpected misc instruction!"); - return false; -} - -/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP. -/// We divide the disassembly task into different categories, with each one -/// corresponding to a specific instruction encoding format. There could be -/// exceptions when handling a specific format, and that is why the Opcode is -/// also present in the function prototype. -static const DisassembleFP FuncPtrs[] = { - &DisassemblePseudo, - &DisassembleMulFrm, - &DisassembleBrFrm, - &DisassembleBrMiscFrm, - &DisassembleDPFrm, - &DisassembleDPSoRegFrm, - &DisassembleLdFrm, - &DisassembleStFrm, - &DisassembleLdMiscFrm, - &DisassembleStMiscFrm, - &DisassembleLdStMulFrm, - &DisassembleLdStExFrm, - &DisassembleArithMiscFrm, - &DisassembleSatFrm, - &DisassembleExtFrm, - &DisassembleVFPUnaryFrm, - &DisassembleVFPBinaryFrm, - &DisassembleVFPConv1Frm, - &DisassembleVFPConv2Frm, - &DisassembleVFPConv3Frm, - &DisassembleVFPConv4Frm, - &DisassembleVFPConv5Frm, - &DisassembleVFPLdStFrm, - &DisassembleVFPLdStMulFrm, - &DisassembleVFPMiscFrm, - &DisassembleThumbFrm, - &DisassembleMiscFrm, - &DisassembleNGetLnFrm, - &DisassembleNSetLnFrm, - &DisassembleNDupFrm, - - // VLD and VST (including one lane) Instructions. - &DisassembleNLdSt, - - // A7.4.6 One register and a modified immediate value - // 1-Register Instructions with imm. - // LLVM only defines VMOVv instructions. - &DisassembleN1RegModImmFrm, - - // 2-Register Instructions with no imm. - &DisassembleN2RegFrm, - - // 2-Register Instructions with imm (vector convert float/fixed point). - &DisassembleNVCVTFrm, - - // 2-Register Instructions with imm (vector dup lane). - &DisassembleNVecDupLnFrm, - - // Vector Shift Left Instructions. - &DisassembleN2RegVecShLFrm, - - // Vector Shift Righ Instructions, which has different interpretation of the - // shift amount from the imm6 field. - &DisassembleN2RegVecShRFrm, - - // 3-Register Data-Processing Instructions. - &DisassembleN3RegFrm, - - // Vector Shift (Register) Instructions. - // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand) - &DisassembleN3RegVecShFrm, - - // Vector Extract Instructions. - &DisassembleNVecExtractFrm, - - // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long] - // By Scalar Instructions. - &DisassembleNVecMulScalarFrm, - - // Vector Table Lookup uses byte indexes in a control vector to look up byte - // values in a table and generate a new vector. - &DisassembleNVTBLFrm, - - NULL -}; - -/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder. -/// The general idea is to set the Opcode for the MCInst, followed by adding -/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates -/// to the Format-specific disassemble function for disassembly, followed by -/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand -/// which follow the Dst/Src Operands. -bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { - // Stage 1 sets the Opcode. - MI.setOpcode(Opcode); - // If the number of operands is zero, we're done! - if (NumOps == 0) - return true; - - // Stage 2 calls the format-specific disassemble function to build the operand - // list. - if (Disasm == NULL) - return false; - unsigned NumOpsAdded = 0; - bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this); - - if (!OK || this->Err != 0) return false; - if (NumOpsAdded >= NumOps) - return true; - - // Stage 3 deals with operands unaccounted for after stage 2 is finished. - // FIXME: Should this be done selectively? - return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded); -} - -// A8.3 Conditional execution -// A8.3.1 Pseudocode details of conditional execution -// Condition bits '111x' indicate the instruction is always executed. -static uint32_t CondCode(uint32_t CondField) { - if (CondField == 0xF) - return ARMCC::AL; - return CondField; -} - -/// DoPredicateOperands - DoPredicateOperands process the predicate operands -/// of some Thumb instructions which come before the reglist operands. It -/// returns true if the two predicate operands have been processed. -bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, - uint32_t /* insn */, unsigned short NumOpsRemaining) { - - assert(NumOpsRemaining > 0 && "Invalid argument"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned Idx = MI.getNumOperands(); - - // First, we check whether this instr specifies the PredicateOperand through - // a pair of MCOperandInfos with isPredicate() property. - if (NumOpsRemaining >= 2 && - OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass < 0 && - OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) - { - // If we are inside an IT block, get the IT condition bits maintained via - // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). - // See also A2.5.2. - if (InITBlock()) - MI.addOperand(MCOperand::CreateImm(GetITCond())); - else - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - return true; - } - - return false; -} - -/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process -/// the possible Predicate and SBitModifier, to build the remaining MCOperand -/// constituents. -bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaining) { - - assert(NumOpsRemaining > 0 && "Invalid argument"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - const std::string &Name = ARMInsts[Opcode].Name; - unsigned Idx = MI.getNumOperands(); - uint64_t TSFlags = ARMInsts[Opcode].TSFlags; - - // First, we check whether this instr specifies the PredicateOperand through - // a pair of MCOperandInfos with isPredicate() property. - if (NumOpsRemaining >= 2 && - OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass < 0 && - OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) - { - // If we are inside an IT block, get the IT condition bits maintained via - // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). - // See also A2.5.2. - if (InITBlock()) - MI.addOperand(MCOperand::CreateImm(GetITCond())); - else { - if (Name.length() > 1 && Name[0] == 't') { - // Thumb conditional branch instructions have their cond field embedded, - // like ARM. - // - // A8.6.16 B - // Check for undefined encodings. - unsigned cond; - if (Name == "t2Bcc") { - if ((cond = slice(insn, 25, 22)) >= 14) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(cond))); - } else if (Name == "tBcc") { - if ((cond = slice(insn, 11, 8)) == 14) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(cond))); - } else - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - } else { - // ARM instructions get their condition field from Inst{31-28}. - // We should reject Inst{31-28} = 0b1111 as invalid encoding. - if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); - } - } - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - Idx += 2; - NumOpsRemaining -= 2; - } - - if (NumOpsRemaining == 0) - return true; - - // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set. - if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0)); - --NumOpsRemaining; - } - - if (NumOpsRemaining == 0) - return true; - else - return false; -} - -/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary -/// after BuildIt is finished. -bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, - uint32_t insn) { - - if (!SP) return Status; - - if (Opcode == ARM::t2IT) - Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false; - else if (InITBlock()) - SP->UpdateIT(); - - return Status; -} - -/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. -ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, - unsigned short num) - : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) { - unsigned Idx = (unsigned)format; - assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format"); - Disasm = FuncPtrs[Idx]; -} - -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { - // For "Unknown format", fail by returning a NULL pointer. - if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) { - DEBUG(errs() << "Unknown format\n"); - return 0; - } - - return new ARMBasicMCBuilder(Opcode, Format, - ARMInsts[Opcode].getNumOperands()); -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has had any PC adjustment made by the caller. If the getOpInfo() -/// function was set as part of the setupBuilderForSymbolicDisassembly() call -/// then that function is called to get any symbolic information at the -/// builder's Address for this instrution. If that returns non-zero then the -/// symbolic information it returns is used to create an MCExpr and that is -/// added as an operand to the MCInst. This function returns true if it adds -/// an operand to the MCInst and false otherwise. -bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, - uint64_t InstSize, - MCInst &MI) { - if (!GetOpInfo) - return false; - - struct LLVMOpInfo1 SymbolicOp; - SymbolicOp.Value = Value; - if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) - return false; - - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) - MI.addOperand(MCOperand::CreateExpr(Expr)); - else - assert("bad SymbolicOp.VariantKind"); - - return true; -} diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h deleted file mode 100644 index a7ba141..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ /dev/null @@ -1,336 +0,0 @@ -//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// -// The first part defines the enumeration type of ARM instruction format, which -// specifies the encoding used by the instruction, as well as a helper function -// to convert the enums to printable char strings. -// -// It also contains code to represent the concepts of Builder and DisassembleFP -// to solve the problem of disassembling an ARM instr. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMDISASSEMBLERCORE_H -#define ARMDISASSEMBLERCORE_H - -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm-c/Disassembler.h" -#include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" -#include "ARMDisassembler.h" - -namespace llvm { -class MCContext; - -class ARMUtils { -public: - static const char *OpcodeName(unsigned Opcode); -}; - -///////////////////////////////////////////////////// -// // -// Enums and Utilities for ARM Instruction Format // -// // -///////////////////////////////////////////////////// - -#define ARM_FORMATS \ - ENTRY(ARM_FORMAT_PSEUDO, 0) \ - ENTRY(ARM_FORMAT_MULFRM, 1) \ - ENTRY(ARM_FORMAT_BRFRM, 2) \ - ENTRY(ARM_FORMAT_BRMISCFRM, 3) \ - ENTRY(ARM_FORMAT_DPFRM, 4) \ - ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \ - ENTRY(ARM_FORMAT_LDFRM, 6) \ - ENTRY(ARM_FORMAT_STFRM, 7) \ - ENTRY(ARM_FORMAT_LDMISCFRM, 8) \ - ENTRY(ARM_FORMAT_STMISCFRM, 9) \ - ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ - ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ - ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ - ENTRY(ARM_FORMAT_SATFRM, 13) \ - ENTRY(ARM_FORMAT_EXTFRM, 14) \ - ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ - ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ - ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ - ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ - ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ - ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ - ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ - ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ - ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ - ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ - ENTRY(ARM_FORMAT_THUMBFRM, 25) \ - ENTRY(ARM_FORMAT_MISCFRM, 26) \ - ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ - ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ - ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ - ENTRY(ARM_FORMAT_NLdSt, 30) \ - ENTRY(ARM_FORMAT_N1RegModImm, 31) \ - ENTRY(ARM_FORMAT_N2Reg, 32) \ - ENTRY(ARM_FORMAT_NVCVT, 33) \ - ENTRY(ARM_FORMAT_NVecDupLn, 34) \ - ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ - ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ - ENTRY(ARM_FORMAT_N3Reg, 37) \ - ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ - ENTRY(ARM_FORMAT_NVecExtract, 39) \ - ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ - ENTRY(ARM_FORMAT_NVTBL, 41) - -// ARM instruction format specifies the encoding used by the instruction. -#define ENTRY(n, v) n = v, -typedef enum { - ARM_FORMATS - ARM_FORMAT_NA -} ARMFormat; -#undef ENTRY - -// Converts enum to const char*. -static const inline char *stringForARMFormat(ARMFormat form) { -#define ENTRY(n, v) case n: return #n; - switch(form) { - ARM_FORMATS - case ARM_FORMAT_NA: - default: - return ""; - } -#undef ENTRY -} - -/// Expands on the enum definitions from ARMBaseInstrInfo.h. -/// They are being used by the disassembler implementation. -namespace ARMII { - enum { - NEONRegMask = 15, - GPRRegMask = 15, - NEON_RegRdShift = 12, - NEON_D_BitShift = 22, - NEON_RegRnShift = 16, - NEON_N_BitShift = 7, - NEON_RegRmShift = 0, - NEON_M_BitShift = 5 - }; -} - -/// Utility function for extracting [From, To] bits from a uint32_t. -static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) { - assert(From < 32 && To < 32 && From >= To); - return (Bits >> To) & ((1 << (From - To + 1)) - 1); -} - -/// Utility function for setting [From, To] bits to Val for a uint32_t. -static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, - unsigned Val) { - assert(From < 32 && To < 32 && From >= To); - uint32_t Mask = ((1 << (From - To + 1)) - 1); - Bits &= ~(Mask << To); - Bits |= (Val & Mask) << To; -} - -// Return an integer result equal to the number of bits of x that are ones. -static inline uint32_t -BitCount (uint64_t x) -{ - // c accumulates the total bits set in x - uint32_t c; - for (c = 0; x; ++c) - { - x &= x - 1; // clear the least significant bit set - } - return c; -} - -static inline bool -BitIsSet (const uint64_t value, const uint64_t bit) -{ - return (value & (1ull << bit)) != 0; -} - -static inline bool -BitIsClear (const uint64_t value, const uint64_t bit) -{ - return (value & (1ull << bit)) == 0; -} - -/// Various utilities for checking the target specific flags. - -/// A unary data processing instruction doesn't have an Rn operand. -static inline bool isUnaryDP(uint64_t TSFlags) { - return (TSFlags & ARMII::UnaryDP); -} - -/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111. -static inline bool isNEONDomain(uint64_t TSFlags) { - return (TSFlags & ARMII::DomainNEON) || - (TSFlags & ARMII::DomainNEONA8); -} - -/// This four-bit field describes the addressing mode used. -/// See also ARMBaseInstrInfo.h. -static inline unsigned getAddrMode(uint64_t TSFlags) { - return (TSFlags & ARMII::AddrModeMask); -} - -/// {IndexModePre, IndexModePost} -/// Only valid for load and store ops. -/// See also ARMBaseInstrInfo.h. -static inline unsigned getIndexMode(uint64_t TSFlags) { - return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; -} - -/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList. -static inline bool isPrePostLdSt(uint64_t TSFlags) { - return (TSFlags & ARMII::IndexModeMask) != 0; -} - -// Forward declaration. -class ARMBasicMCBuilder; - -// Builder Object is mostly ignored except in some Thumb disassemble functions. -typedef ARMBasicMCBuilder *BO; - -/// DisassembleFP - DisassembleFP points to a function that disassembles an insn -/// and builds the MCOperand list upon disassembly. It returns false on failure -/// or true on success. The number of operands added is updated upon success. -typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder); - -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - -/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that -/// knows how to build up the MCOperand list. -class ARMBasicMCBuilder { - friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - unsigned Opcode; - ARMFormat Format; - unsigned short NumOps; - DisassembleFP Disasm; - Session *SP; - int Err; // !=0 if the builder encounters some error condition during build. - -private: - /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. - ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); - -public: - ARMBasicMCBuilder(ARMBasicMCBuilder &B) - : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { - Err = 0; - } - - virtual ~ARMBasicMCBuilder() {} - - void SetSession(Session *sp) { - SP = sp; - } - - void SetErr(int ErrCode) { - Err = ErrCode; - } - - /// DoPredicateOperands - DoPredicateOperands process the predicate operands - /// of some Thumb instructions which come before the reglist operands. It - /// returns true if the two predicate operands have been processed. - bool DoPredicateOperands(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaning); - - /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process - /// the possible Predicate and SBitModifier, to build the remaining MCOperand - /// constituents. - bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaning); - - /// InITBlock - InITBlock returns true if we are inside an IT block. - bool InITBlock() { - if (SP) - return SP->ITCounter > 0; - - return false; - } - - /// Build - Build delegates to BuildIt to perform the heavy liftling. After - /// that, it invokes RunBuildAfterHook where some housekeepings can be done. - virtual bool Build(MCInst &MI, uint32_t insn) { - bool Status = BuildIt(MI, insn); - return RunBuildAfterHook(Status, MI, insn); - } - - /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder. - /// The general idea is to set the Opcode for the MCInst, followed by adding - /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates - /// to the Format-specific disassemble function for disassembly, followed by - /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand - /// which follow the Dst/Src Operands. - virtual bool BuildIt(MCInst &MI, uint32_t insn); - - /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary - /// after BuildIt is finished. - virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn); - -private: - /// Get condition of the current IT instruction. - unsigned GetITCond() { - assert(SP); - return slice(SP->ITState, 7, 4); - } - -private: - // - // Hooks for symbolic disassembly via the public 'C' interface. - // - // The function to get the symbolic information for operands. - LLVMOpInfoCallback GetOpInfo; - // The pointer to the block of symbolic information for above call back. - void *DisInfo; - // The assembly context for creating symbols and MCExprs in place of - // immediate operands when there is symbolic information. - MCContext *Ctx; - // The address of the instruction being disassembled. - uint64_t Address; - -public: - void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, - void *disInfo, MCContext *ctx, - uint64_t address) { - GetOpInfo = getOpInfo; - DisInfo = disInfo; - Ctx = ctx; - Address = address; - } - - uint64_t getBuilderAddress() const { return Address; } - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has had any PC adjustment made by the caller. If the getOpInfo() - /// function was set as part of the setupBuilderForSymbolicDisassembly() call - /// then that function is called to get any symbolic information at the - /// builder's Address for this instrution. If that returns non-zero then the - /// symbolic information it returns is used to create an MCExpr and that is - /// added as an operand to the MCInst. This function returns true if it adds - /// an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); - -}; - -} // namespace llvm - -#endif diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h deleted file mode 100644 index 834c6f6..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ /dev/null @@ -1,2459 +0,0 @@ -//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code for disassembling a Thumb instr. It is to be included by -// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm() -// function which acts as the dispatcher to disassemble a Thumb instruction. -// -//===----------------------------------------------------------------------===// - -/////////////////////////////// -// // -// Utility Functions // -// // -/////////////////////////////// - -// Utilities for 16-bit Thumb instructions. -/* -15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 - [ tRt ] - [ tRm ] [ tRn ] [ tRd ] - D [ Rm ] [ Rd ] - - [ imm3] - [ imm5 ] - i [ imm5 ] - [ imm7 ] - [ imm8 ] - [ imm11 ] - - [ cond ] -*/ - -// Extract tRt: Inst{10-8}. -static inline unsigned getT1tRt(uint32_t insn) { - return slice(insn, 10, 8); -} - -// Extract tRm: Inst{8-6}. -static inline unsigned getT1tRm(uint32_t insn) { - return slice(insn, 8, 6); -} - -// Extract tRn: Inst{5-3}. -static inline unsigned getT1tRn(uint32_t insn) { - return slice(insn, 5, 3); -} - -// Extract tRd: Inst{2-0}. -static inline unsigned getT1tRd(uint32_t insn) { - return slice(insn, 2, 0); -} - -// Extract [D:Rd]: Inst{7:2-0}. -static inline unsigned getT1Rd(uint32_t insn) { - return slice(insn, 7, 7) << 3 | slice(insn, 2, 0); -} - -// Extract Rm: Inst{6-3}. -static inline unsigned getT1Rm(uint32_t insn) { - return slice(insn, 6, 3); -} - -// Extract imm3: Inst{8-6}. -static inline unsigned getT1Imm3(uint32_t insn) { - return slice(insn, 8, 6); -} - -// Extract imm5: Inst{10-6}. -static inline unsigned getT1Imm5(uint32_t insn) { - return slice(insn, 10, 6); -} - -// Extract i:imm5: Inst{9:7-3}. -static inline unsigned getT1Imm6(uint32_t insn) { - return slice(insn, 9, 9) << 5 | slice(insn, 7, 3); -} - -// Extract imm7: Inst{6-0}. -static inline unsigned getT1Imm7(uint32_t insn) { - return slice(insn, 6, 0); -} - -// Extract imm8: Inst{7-0}. -static inline unsigned getT1Imm8(uint32_t insn) { - return slice(insn, 7, 0); -} - -// Extract imm11: Inst{10-0}. -static inline unsigned getT1Imm11(uint32_t insn) { - return slice(insn, 10, 0); -} - -// Extract cond: Inst{11-8}. -static inline unsigned getT1Cond(uint32_t insn) { - return slice(insn, 11, 8); -} - -static inline bool IsGPR(unsigned RegClass) { - return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID; -} - -// Utilities for 32-bit Thumb instructions. - -static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; } - -// Extract imm4: Inst{19-16}. -static inline unsigned getImm4(uint32_t insn) { - return slice(insn, 19, 16); -} - -// Extract imm3: Inst{14-12}. -static inline unsigned getImm3(uint32_t insn) { - return slice(insn, 14, 12); -} - -// Extract imm8: Inst{7-0}. -static inline unsigned getImm8(uint32_t insn) { - return slice(insn, 7, 0); -} - -// A8.6.61 LDRB (immediate, Thumb) and friends -// +/-: Inst{9} -// imm8: Inst{7-0} -static inline int decodeImm8(uint32_t insn) { - int Offset = getImm8(insn); - return slice(insn, 9, 9) ? Offset : -Offset; -} - -// Extract imm12: Inst{11-0}. -static inline unsigned getImm12(uint32_t insn) { - return slice(insn, 11, 0); -} - -// A8.6.63 LDRB (literal) and friends -// +/-: Inst{23} -// imm12: Inst{11-0} -static inline int decodeImm12(uint32_t insn) { - int Offset = getImm12(insn); - return slice(insn, 23, 23) ? Offset : -Offset; -} - -// Extract imm2: Inst{7-6}. -static inline unsigned getImm2(uint32_t insn) { - return slice(insn, 7, 6); -} - -// For BFI, BFC, t2SBFX, and t2UBFX. -// Extract lsb: Inst{14-12:7-6}. -static inline unsigned getLsb(uint32_t insn) { - return getImm3(insn) << 2 | getImm2(insn); -} - -// For BFI and BFC. -// Extract msb: Inst{4-0}. -static inline unsigned getMsb(uint32_t insn) { - return slice(insn, 4, 0); -} - -// For t2SBFX and t2UBFX. -// Extract widthminus1: Inst{4-0}. -static inline unsigned getWidthMinus1(uint32_t insn) { - return slice(insn, 4, 0); -} - -// For t2ADDri12 and t2SUBri12. -// imm12 = i:imm3:imm8; -static inline unsigned getIImm3Imm8(uint32_t insn) { - return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn); -} - -// For t2MOVi16 and t2MOVTi16. -// imm16 = imm4:i:imm3:imm8; -static inline unsigned getImm16(uint32_t insn) { - return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 | - getImm3(insn) << 8 | getImm8(insn); -} - -// Inst{5-4} encodes the shift type. -static inline unsigned getShiftTypeBits(uint32_t insn) { - return slice(insn, 5, 4); -} - -// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount. -static inline unsigned getShiftAmtBits(uint32_t insn) { - return getImm3(insn) << 2 | getImm2(insn); -} - -// A8.6.17 BFC -// Encoding T1 ARMv6T2, ARMv7 -// LLVM-specific encoding for #<lsb> and #<width> -static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) { - uint32_t lsb = getImm3(insn) << 2 | getImm2(insn); - uint32_t msb = getMsb(insn); - uint32_t Val = 0; - if (msb < lsb) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - for (uint32_t i = lsb; i <= msb; ++i) - Val |= (1 << i); - mask = ~Val; - return true; -} - -// A8.4 Shifts applied to a register -// A8.4.1 Constant shifts -// A8.4.3 Pseudocode details of instruction-specified shifts and rotates -// -// decodeImmShift() returns the shift amount and the the shift opcode. -// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet. -static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5, - ARM_AM::ShiftOpc &ShOp) { - - assert(imm5 < 32 && "Invalid imm5 argument"); - switch (bits2) { - default: assert(0 && "No such value"); - case 0: - ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl); - return imm5; - case 1: - ShOp = ARM_AM::lsr; - return (imm5 == 0 ? 32 : imm5); - case 2: - ShOp = ARM_AM::asr; - return (imm5 == 0 ? 32 : imm5); - case 3: - ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror); - return (imm5 == 0 ? 1 : imm5); - } -} - -// A6.3.2 Modified immediate constants in Thumb instructions -// -// ThumbExpandImm() returns the modified immediate constant given an imm12 for -// Thumb data-processing instructions with modified immediate. -// See also A6.3.1 Data-processing (modified immediate). -static inline unsigned ThumbExpandImm(unsigned imm12) { - assert(imm12 <= 0xFFF && "Invalid imm12 argument"); - - // If the leading two bits is 0b00, the modified immediate constant is - // obtained by splatting the low 8 bits into the first byte, every other byte, - // or every byte of a 32-bit value. - // - // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is - // performed. - - if (slice(imm12, 11, 10) == 0) { - unsigned short control = slice(imm12, 9, 8); - unsigned imm8 = slice(imm12, 7, 0); - switch (control) { - default: - assert(0 && "No such value"); - return 0; - case 0: - return imm8; - case 1: - return imm8 << 16 | imm8; - case 2: - return imm8 << 24 | imm8 << 8; - case 3: - return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8; - } - } else { - // A rotate is required. - unsigned Val = 1 << 7 | slice(imm12, 6, 0); - unsigned Amt = slice(imm12, 11, 7); - return ARM_AM::rotr32(Val, Amt); - } -} - -static inline int decodeImm32_B_EncodingT3(uint32_t insn) { - bool S = slice(insn, 26, 26); - bool J1 = slice(insn, 13, 13); - bool J2 = slice(insn, 11, 11); - unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm21 |= 1 << 20; - if (J2) Imm21 |= 1 << 19; - if (J1) Imm21 |= 1 << 18; - - return SignExtend32<21>(Imm21); -} - -static inline int decodeImm32_B_EncodingT4(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -static inline int decodeImm32_BL(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -static inline int decodeImm32_BLX(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -// See, for example, A8.6.221 SXTAB16. -static inline unsigned decodeRotate(uint32_t insn) { - unsigned rotate = slice(insn, 5, 4); - return rotate << 3; -} - -/////////////////////////////////////////////// -// // -// Thumb1 instruction disassembly functions. // -// // -/////////////////////////////////////////////// - -// See "Utilities for 16-bit Thumb instructions" for register naming convention. - -// A6.2.1 Shift (immediate), add, subtract, move, and compare -// -// shift immediate: tRd CPSR tRn imm5 -// add/sub register: tRd CPSR tRn tRm -// add/sub 3-bit immediate: tRd CPSR tRn imm3 -// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8 -// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov) -// -// Special case: -// tMOVSr: tRd tRn -static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID - && "Invalid arguments"); - - bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3); - - // Use Rt implies use imm8. - bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 || - Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::tGPRRegClassID, - UseRt ? getT1tRt(insn) : getT1tRd(insn)))); - ++OpIdx; - - // Check whether the next operand to be added is a CCR Register. - if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { - assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); - ++OpIdx; - } - - // Check whether the next operand to be added is a Thumb1 Register. - assert(OpIdx < NumOps && "More operands expected"); - if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // For UseRt, the reg operand is tied to the first reg operand. - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::tGPRRegClassID, - UseRt ? getT1tRt(insn) : getT1tRn(insn)))); - ++OpIdx; - } - - // Special case for tMOVSr. - if (OpIdx == NumOps) - return true; - - // The next available operand is either a reg operand or an imm operand. - if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // Three register operand instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - } else { - assert(OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - unsigned Imm = 0; - if (UseRt) - Imm = getT1Imm8(insn); - else if (Imm3) - Imm = getT1Imm3(insn); - else { - Imm = getT1Imm5(insn); - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11)); - getImmShiftSE(ShOp, Imm); - } - MI.addOperand(MCOperand::CreateImm(Imm)); - } - ++OpIdx; - - return true; -} - -// A6.2.2 Data-processing -// -// tCMPr, tTST, tCMN: tRd tRn -// tMVN, tRSB: tRd CPSR tRn -// Others: tRd CPSR tRd(TIED_TO) tRn -static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == ARM::CCRRegClassID - || OpInfo[1].RegClass == ARM::tGPRRegClassID) - && "Invalid arguments"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - ++OpIdx; - - // Check whether the next operand to be added is a CCR Register. - if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { - assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); - ++OpIdx; - } - - // We have either { tRd(TIED_TO), tRn } or { tRn } remaining. - // Process the TIED_TO operand first. - - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Process possible next reg operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // Add tRn operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - ++OpIdx; - } - - return true; -} - -// A6.2.3 Special data instructions and branch and exchange -// -// tADDhirr: Rd Rd(TIED_TO) Rm -// tCMPhir: Rd Rm -// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn -// tBX: Rm -// tBX_RET: 0 operand -// tBX_RET_vararg: Rm -// tBLXr_r9: Rm -// tBRIND: Rm -static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // tBX_RET has 0 operand. - if (NumOps == 0) - return true; - - // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm. - if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) { - if (Opcode == ARM::tBLXr_r9) { - // Handling the two predicate operands before the reg operand. - if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - return false; - NumOpsAdded += 2; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - getT1Rm(insn)))); - NumOpsAdded += 1; - - if (Opcode == ARM::tBX) { - // Handling the two predicate operands after the reg operand. - if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - return false; - NumOpsAdded += 2; - } - - return true; - } - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Add the destination operand. - unsigned RegClass = OpInfo[OpIdx].RegClass; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, - IsGPR(RegClass) ? getT1Rd(insn) - : getT1tRd(insn)))); - ++OpIdx; - - // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining. - // Process the TIED_TO operand first. - - assert(OpIdx < NumOps && "More operands expected"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // The next reg operand is either Rm or tRn. - assert(OpIdx < NumOps && "More operands expected"); - RegClass = OpInfo[OpIdx].RegClass; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, - IsGPR(RegClass) ? getT1Rm(insn) - : getT1tRn(insn)))); - ++OpIdx; - - return true; -} - -// A8.6.59 LDR (literal) -// -// tLDRpci: tRt imm8*4 -static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 && - !OpInfo[1].isPredicate() && - !OpInfo[1].isOptionalDef()) - && "Invalid arguments"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - - // And the (imm8 << 2) operand. - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2)); - - NumOpsAdded = 2; - - return true; -} - -// Thumb specific addressing modes (see ARMInstrThumb.td): -// -// t_addrmode_rr := reg + reg -// -// t_addrmode_s4 := reg + reg -// reg + imm5 * 4 -// -// t_addrmode_s2 := reg + reg -// reg + imm5 * 2 -// -// t_addrmode_s1 := reg + reg -// reg + imm5 -// -// t_addrmode_sp := sp + imm8 * 4 -// - -// A8.6.63 LDRB (literal) -// A8.6.79 LDRSB (literal) -// A8.6.75 LDRH (literal) -// A8.6.83 LDRSH (literal) -// A8.6.59 LDR (literal) -// -// These instrs calculate an address from the PC value and an immediate offset. -// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) -static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass < 0 && - "Expect >= 2 operands, first as reg, and second as imm operand"); - - // Build the register operand, followed by the (+/-)imm12 immediate. - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); - - NumOpsAdded = 2; - - return true; -} - - -// A6.2.4 Load/store single data item -// -// Load/Store Register (reg|imm): tRd tRn imm5|tRm -// Load Register Signed Byte|Halfword: tRd tRn tRm -static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::tGPRRegClassID - && OpInfo[1].RegClass == ARM::tGPRRegClassID - && "Expect >= 2 operands and first two as thumb reg operands"); - - // Add the destination reg and the base reg. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - OpIdx = 2; - - // We have either { imm5 } or { tRm } remaining. - // Note that STR/LDR (register) should skip the imm5 offset operand for - // t_addrmode_s[1|2|4]. - - assert(OpIdx < NumOps && "More operands expected"); - - if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && - !OpInfo[OpIdx].isOptionalDef()) { - // Table A6-5 16-bit Thumb Load/store instructions - // opA = 0b0101 for STR/LDR (register) and friends. - // Otherwise, we have STR/LDR (immediate) and friends. - assert(opA != 5 && "Immediate operand expected for this opcode"); - MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn))); - ++OpIdx; - } else { - // The next reg operand is tRm, the offset. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - ++OpIdx; - } - return true; -} - -// A6.2.4 Load/store single data item -// -// Load/Store Register SP relative: tRt ARM::SP imm8 -static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) - && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::tGPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass < 0 && - !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 3; - return true; -} - -// Table A6-1 16-bit Thumb instruction encoding -// A8.6.10 ADR -// -// tADDrPCi: tRt imm8 -static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 && - !OpInfo[1].isPredicate() && - !OpInfo[1].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 2; - return true; -} - -// Table A6-1 16-bit Thumb instruction encoding -// A8.6.8 ADD (SP plus immediate) -// -// tADDrSPi: tRt ARM::SP imm8 -static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::tGPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass < 0 && - !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 3; - return true; -} - -// tPUSH, tPOP: Pred-Imm Pred-CCR register_list -// -// where register_list = low registers + [lr] for PUSH or -// low registers + [pc] for POP -// -// "low registers" is specified by Inst{7-0} -// lr|pc is specified by Inst{8} -static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode"); - - unsigned &OpIdx = NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - OpIdx += 2; - else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15) - | slice(insn, 7, 0); - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 16; ++i) { - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++OpIdx; - } - } - - return true; -} - -// A6.2.5 Miscellaneous 16-bit instructions -// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP. -// -// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7 -// t2IT: firstcond=Inst{7-4} mask=Inst{3-0} -// tCBNZ, tCBZ: tRd imm6*2 -// tBKPT: imm8 -// tNOP, tSEV, tYIELD, tWFE, tWFI: -// no operand (except predicate pair) -// tSETENDBE, tSETENDLE, : -// no operand -// Others: tRd tRn -static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (NumOps == 0) - return true; - - if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) - return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - // Predicate operands are handled elsewhere. - if (NumOps == 2 && - OpInfo[0].isPredicate() && OpInfo[1].isPredicate() && - OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { - return true; - } - - if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) { - // Special case handling for tADDspi and tSUBspi. - // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate) - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn))); - NumOpsAdded = 3; - return true; - } - - if (Opcode == ARM::t2IT) { - // Special case handling for If-Then. - // A8.6.50 IT - // Tag the (firstcond[0] bit << 4) along with mask. - - // firstcond - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4))); - - // firstcond[0] and mask - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - NumOpsAdded = 2; - return true; - } - - if (Opcode == ARM::tBKPT) { - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value - NumOpsAdded = 1; - return true; - } - - // CPS has a singleton $opt operand that contains the following information: - // The first op would be 0b10 as enable and 0b11 as disable in regular ARM, - // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's - // default one. The second get the AIF flags from Inst{2-0}. - if (Opcode == ARM::tCPS) { - MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0))); - NumOpsAdded = 2; - return true; - } - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) - && "Expect >=2 operands"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - - if (OpInfo[1].RegClass == ARM::tGPRRegClassID) { - // Two register instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - } else { - // CBNZ, CBZ - assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode"); - MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2)); - } - - NumOpsAdded = 2; - - return true; -} - -// A8.6.53 LDM / LDMIA -// A8.6.189 STM / STMIA -// -// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list -// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list -static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, - unsigned &NumOpsAdded, BO B) { - assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD || - Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode"); - - unsigned tRt = getT1tRt(insn); - NumOpsAdded = 0; - - // WB register, if necessary. - if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - tRt))); - ++NumOpsAdded; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - tRt))); - ++NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { - NumOpsAdded += 2; - } else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = slice(insn, 7, 0); - if (BitCount(RegListBits) < 1) { - DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n"); - return false; - } - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 8; ++i) - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - i))); - ++NumOpsAdded; - } - - return true; -} - -static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded, - B); -} - -static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded, - B); -} - -// A8.6.16 B Encoding T1 -// cond = Inst{11-8} & imm8 = Inst{7-0} -// imm32 = SignExtend(imm8:'0', 32) -// -// tBcc: offset Pred-Imm Pred-CCR -// tSVC: imm8 Pred-Imm Pred-CCR -// tTRAP: 0 operand (early return) -static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - if (Opcode == ARM::tTRAP) - return true; - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps == 3 && OpInfo[0].RegClass < 0 && - OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID - && "Exactly 3 operands expected"); - - unsigned Imm8 = getT1Imm8(insn); - MI.addOperand(MCOperand::CreateImm( - Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1) - : (int)Imm8)); - - // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier(). - // But note that for tBcc, if cond = '1110' then UNDEFINED. - if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) { - DEBUG(errs() << "if cond = '1110' then UNDEFINED\n"); - return false; - } - NumOpsAdded = 1; - - return true; -} - -// A8.6.16 B Encoding T2 -// imm11 = Inst{10-0} -// imm32 = SignExtend(imm11:'0', 32) -// -// tB: offset -static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); - - unsigned Imm11 = getT1Imm11(insn); - - MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1))); - - NumOpsAdded = 1; - - return true; - -} - -// See A6.2 16-bit Thumb instruction encoding for instruction classes -// corresponding to op. -// -// Table A6-1 16-bit Thumb instruction encoding (abridged) -// op Instruction or instruction class -// ------ -------------------------------------------------------------------- -// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7 -// 010000 Data-processing on page A6-8 -// 010001 Special data instructions and branch and exchange on page A6-9 -// 01001x Load from Literal Pool, see LDR (literal) on page A8-122 -// 0101xx Load/store single data item on page A6-10 -// 011xxx -// 100xxx -// 10100x Generate PC-relative address, see ADR on page A8-32 -// 10101x Generate SP-relative address, see ADD (SP plus immediate) on -// page A8-28 -// 1011xx Miscellaneous 16-bit instructions on page A6-11 -// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374 -// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a -// 1101xx Conditional branch, and Supervisor Call on page A6-13 -// 11100x Unconditional Branch, see B on page A8-44 -// -static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - unsigned op1 = slice(op, 5, 4); - unsigned op2 = slice(op, 3, 2); - unsigned op3 = slice(op, 1, 0); - unsigned opA = slice(op, 5, 2); - switch (op1) { - case 0: - // A6.2.1 Shift (immediate), add, subtract, move, and compare - return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 1: - switch (op2) { - case 0: - switch (op3) { - case 0: - // A6.2.2 Data-processing - return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 1: - // A6.2.3 Special data instructions and branch and exchange - return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - default: - // A8.6.59 LDR (literal) - return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - break; - default: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, - B); - break; - } - break; - case 2: - switch (op2) { - case 0: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, - B); - case 1: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 2: - if (op3 <= 1) { - // A8.6.10 ADR - return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } else { - // A8.6.8 ADD (SP plus immediate) - return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - default: - // A6.2.5 Miscellaneous 16-bit instructions - return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - break; - case 3: - switch (op2) { - case 0: - if (op3 <= 1) { - // A8.6.189 STM / STMIA / STMEA - return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // A8.6.53 LDM / LDMIA / LDMFD - return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - case 1: - // A6.2.6 Conditional branch, and Supervisor Call - return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 2: - // Unconditional Branch, see B on page A8-44 - return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B); - default: - assert(0 && "Unreachable code"); - break; - } - break; - default: - assert(0 && "Unreachable code"); - break; - } - - return false; -} - -/////////////////////////////////////////////// -// // -// Thumb2 instruction disassembly functions. // -// // -/////////////////////////////////////////////// - -/////////////////////////////////////////////////////////// -// // -// Note: the register naming follows the ARM convention! // -// // -/////////////////////////////////////////////////////////// - -static inline bool Thumb2SRSOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2SRSDBW: case ARM::t2SRSDB: - case ARM::t2SRSIAW: case ARM::t2SRSIA: - return true; - } -} - -static inline bool Thumb2RFEOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2RFEDBW: case ARM::t2RFEDB: - case ARM::t2RFEIAW: case ARM::t2RFEIA: - return true; - } -} - -// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0} -static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - NumOpsAdded = 1; - return true; -} - -// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn -static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - unsigned Rn = decodeRn(insn); - if (Rn == 15) { - DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); - return false; - } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn))); - NumOpsAdded = 1; - return true; -} - -static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (Thumb2SRSOpcode(Opcode)) - return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded); - - if (Thumb2RFEOpcode(Opcode)) - return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD || - Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD || - Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || - Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) - && "Unexpected opcode"); - assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4"); - - NumOpsAdded = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base. - if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD || - Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - } - - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { - NumOpsAdded += 2; - } else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = insn & ((1 << 16) - 1); - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 16; ++i) - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++NumOpsAdded; - } - - return true; -} - -// t2LDREX: Rd Rn -// t2LDREXD: Rd Rs Rn -// t2LDREXB, t2LDREXH: Rd Rn -// t2STREX: Rs Rd Rn -// t2STREXD: Rm Rd Rs Rn -// t2STREXB, t2STREXH: Rm Rd Rn -static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass > 0 - && OpInfo[1].RegClass > 0 - && "Expect >=2 operands and first two as reg operands"); - - bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH); - bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX); - bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD); - - unsigned Rt = decodeRd(insn); - unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX. - unsigned Rd = decodeRm(insn); - unsigned Rn = decodeRn(insn); - - // Some sanity checking first. - if (isStore) { - // if d == n || d == t then UNPREDICTABLE - // if d == n || d == t || d == t2 then UNPREDICTABLE - if (isDW) { - if (Rd == Rn || Rd == Rt || Rd == Rt2) { - DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n"); - return false; - } - } else { - if (isSW) { - if (Rt2 == Rn || Rt2 == Rt) { - DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); - return false; - } - } else { - if (Rd == Rn || Rd == Rt) { - DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); - return false; - } - } - } - } else { - // Load - // A8.6.71 LDREXD - // if t == t2 then UNPREDICTABLE - if (isDW && Rt == Rt2) { - DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); - return false; - } - } - - // Add the destination operand for store. - if (isStore) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, - isSW ? Rt2 : Rd))); - ++OpIdx; - } - - // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - Rt))); - ++OpIdx; - - // Thumb2 doubleword complication: with an extra source/destination operand. - if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - Rt2))); - ++OpIdx; - } - - // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - Rn))); - ++OpIdx; - - return true; -} - -// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode) -// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version) -// t2STRDi8: Rd Rs Rn imm8s4 (offset mode) -// -// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for -// disassembly only and do not have a tied_to writeback base register operand. -static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 4 - && OpInfo[0].RegClass > 0 - && OpInfo[0].RegClass == OpInfo[1].RegClass - && OpInfo[2].RegClass > 0 - && OpInfo[3].RegClass < 0 - && "Expect >= 4 operands and first 3 as reg operands"); - - // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1). - unsigned Rt = decodeRd(insn); - unsigned Rt2 = decodeRs(insn); - unsigned Rn = decodeRn(insn); - - // Some sanity checking first. - - // A8.6.67 LDRD (literal) has its W bit as (0). - if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) { - if (Rn == 15 && slice(insn, 21, 21) != 0) - return false; - } else { - // For Dual Store, PC cannot be used as the base register. - if (Rn == 15) { - DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); - return false; - } - } - if (Rt == Rt2) { - DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); - return false; - } - if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) { - if (Rn == Rt || Rn == Rt2) { - DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n"); - return false; - } - } - - // Add the <Rt> <Rt2> operands. - unsigned RegClassPair = OpInfo[0].RegClass; - unsigned RegClassBase = OpInfo[2].RegClass; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, - decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase, - decodeRn(insn)))); - - // Finally add (+/-)imm8*4, depending on the U bit. - int Offset = getImm8(insn) * 4; - if (getUBit(insn) == 0) - Offset = -Offset; - MI.addOperand(MCOperand::CreateImm(Offset)); - NumOpsAdded = 4; - - return true; -} - -// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR -static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "Expect >= 2 operands"); - - // The generic version of TBB/TBH needs a base register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - // Add the index register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 2; - - return true; -} - -static inline bool Thumb2ShiftOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2MOVCClsl: case ARM::t2MOVCClsr: - case ARM::t2MOVCCasr: case ARM::t2MOVCCror: - case ARM::t2LSLri: case ARM::t2LSRri: - case ARM::t2ASRri: case ARM::t2RORri: - return true; - } -} - -// A6.3.11 Data-processing (shifted register) -// -// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm -// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm -// Three register operands: Rs Rn Rm -// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm -// -// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 -// register with shift forms: (Rm, ConstantShiftSpecifier). -// Constant shift specifier: Imm = (ShOp | ShAmt<<3). -// -// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which -// only require two register operands: Rd, Rm in ARM Reference Manual terms, and -// nothing else, because the shift amount is already specified. -// Similar case holds for t2MOVrx, t2ADDrr, ..., etc. -static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - // Special case handling. - if (Opcode == ARM::t2BR_JT) { - assert(NumOps == 4 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass < 0 - && OpInfo[3].RegClass < 0 - && "Exactly 4 operands expect and first two as reg operands"); - // Only need to populate the src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(0)); - MI.addOperand(MCOperand::CreateImm(0)); - MI.addOperand(MCOperand::CreateImm(0)); - NumOpsAdded = 4; - return true; - } - - OpIdx = 0; - - assert(NumOps >= 2 - && (OpInfo[0].RegClass == ARM::GPRRegClassID || - OpInfo[0].RegClass == ARM::rGPRRegClassID) - && (OpInfo[1].RegClass == ARM::GPRRegClassID || - OpInfo[1].RegClass == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first two as reg operands"); - - bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID || - OpInfo[2].RegClass == ARM::rGPRRegClassID)); - bool NoDstReg = (decodeRs(insn) == 0xF); - - // Build the register operands, followed by the constant shift specifier. - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[0].RegClass, - NoDstReg ? decodeRn(insn) : decodeRs(insn)))); - ++OpIdx; - - if (ThreeReg) { - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Process tied_to operand constraint. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } else if (!NoDstReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass, - decodeRn(insn)))); - ++OpIdx; - } else { - DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n"); - return false; - } - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRm(insn)))); - ++OpIdx; - - if (NumOps == OpIdx) - return true; - - if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef()) { - - if (Thumb2ShiftOpcode(Opcode)) { - unsigned Imm = getShiftAmtBits(insn); - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4)); - getImmShiftSE(ShOp, Imm); - MI.addOperand(MCOperand::CreateImm(Imm)); - } else { - // Build the constant shift specifier operand. - unsigned bits2 = getShiftTypeBits(insn); - unsigned imm5 = getShiftAmtBits(insn); - ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift; - unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); - } - ++OpIdx; - } - - return true; -} - -// A6.3.1 Data-processing (modified immediate) -// -// Two register operands: Rs Rn ModImm -// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm -// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - -// {t2MOVi, t2MVNi} -// -// ModImm = ThumbExpandImm(i:imm3:imm8) -static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RdRegClassID = OpInfo[0].RegClass; - assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || - RdRegClassID == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first one as reg operand"); - - unsigned RnRegClassID = OpInfo[1].RegClass; - bool TwoReg = (RnRegClassID == ARM::GPRRegClassID - || RnRegClassID == ARM::rGPRRegClassID); - bool NoDstReg = (decodeRs(insn) == 0xF); - - // Build the register operands, followed by the modified immediate. - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RdRegClassID, - NoDstReg ? decodeRn(insn) : decodeRs(insn)))); - ++OpIdx; - - if (TwoReg) { - if (NoDstReg) { - DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); - return false; - } - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - } else { - // Add second reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - } - ++OpIdx; - } - - // The modified immediate operand should come next. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - // i:imm3:imm8 - // A6.3.2 Modified immediate constants in Thumb instructions - unsigned imm12 = getIImm3Imm8(insn); - MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12))); - ++OpIdx; - - return true; -} - -static inline bool Thumb2SaturateOpcode(unsigned Opcode) { - switch (Opcode) { - case ARM::t2SSAT: case ARM::t2SSAT16: - case ARM::t2USAT: case ARM::t2USAT16: - return true; - default: - return false; - } -} - -/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions: -/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt -/// o t2SSAT16, t2USAT16: Rs sat_pos Rn -static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned &NumOpsAdded, BO B) { - const MCInstrDesc &MCID = ARMInsts[Opcode]; - NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands - - // Disassemble the register def. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - unsigned Pos = slice(insn, 4, 0); - if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16) - Pos += 1; - MI.addOperand(MCOperand::CreateImm(Pos)); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - if (NumOpsAdded == 4) { - ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ? - ARM_AM::asr : ARM_AM::lsl); - // Inst{14-12:7-6} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); - if (ShAmt == 0) { - if (Opc == ARM_AM::asr) - ShAmt = 32; - else - Opc = ARM_AM::no_shift; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); - } - return true; -} - -// A6.3.3 Data-processing (plain binary immediate) -// -// o t2ADDri12, t2SUBri12: Rs Rn imm12 -// o t2LEApcrel (ADR): Rs imm12 -// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm -// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm -// o t2MOVi16: Rs imm16 -// o t2MOVTi16: Rs imm16 -// o t2SBFX (SBFX): Rs Rn lsb width -// o t2UBFX (UBFX): Rs Rn lsb width -// o t2BFI (BFI): Rs Rn lsb width -static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RdRegClassID = OpInfo[0].RegClass; - assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || - RdRegClassID == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first one as reg operand"); - - unsigned RnRegClassID = OpInfo[1].RegClass; - bool TwoReg = (RnRegClassID == ARM::GPRRegClassID - || RnRegClassID == ARM::rGPRRegClassID); - - // Build the register operand(s), followed by the immediate(s). - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID, - decodeRs(insn)))); - ++OpIdx; - - if (TwoReg) { - assert(NumOps >= 3 && "Expect >= 3 operands"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Process tied_to operand constraint. - MI.addOperand(MI.getOperand(Idx)); - } else { - // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - } - ++OpIdx; - } - - if (Opcode == ARM::t2BFI) { - // Add val reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - // Pre-increment OpIdx. - ++OpIdx; - - if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 - || Opcode == ARM::t2LEApcrel) - MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { - if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { - uint32_t mask = 0; - if (getBitfieldInvMask(insn, mask)) - MI.addOperand(MCOperand::CreateImm(mask)); - else - return false; - } else { - // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) - && "Unexpected opcode"); - MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); - - ++OpIdx; - } - - return true; -} - -// A6.3.4 Table A6-15 Miscellaneous control instructions -// A8.6.41 DMB -// A8.6.42 DSB -// A8.6.49 ISB -static inline bool t2MiscCtrlInstr(uint32_t insn) { - if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 && - slice(insn, 12, 12) == 0) - return true; - - return false; -} - -// A6.3.4 Branches and miscellaneous control -// -// A8.6.16 B -// Branches: t2B, t2Bcc -> imm operand -// -// Branches: t2TPsoft -> no operand -// -// A8.6.23 BL, BLX (immediate) -// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand -// -// A8.6.26 -// t2BXJ -> Rn -// -// Miscellaneous control: -// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) -// -// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV -// -> no operand (except pred-imm pred-ccr) -// -// t2DBG -> imm4 = Inst{3-0} -// -// t2MRS/t2MRSsys -> Rs -// t2MSR/t2MSRsys -> Rn mask=Inst{11-8} -// t2SMC -> imm4 = Inst{19-16} -static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (NumOps == 0) - return true; - - if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) { - // Inst{3-0} encodes the memory barrier option for the variants. - unsigned opt = slice(insn, 3, 0); - switch (opt) { - case ARM_MB::SY: case ARM_MB::ST: - case ARM_MB::ISH: case ARM_MB::ISHST: - case ARM_MB::NSH: case ARM_MB::NSHST: - case ARM_MB::OSH: case ARM_MB::OSHST: - MI.addOperand(MCOperand::CreateImm(opt)); - NumOpsAdded = 1; - return true; - default: - return false; - } - } - - if (t2MiscCtrlInstr(insn)) - return true; - - switch (Opcode) { - case ARM::t2CLREX: - case ARM::t2NOP: - case ARM::t2YIELD: - case ARM::t2WFE: - case ARM::t2WFI: - case ARM::t2SEV: - return true; - default: - break; - } - - // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different - // opcodes which match the same real instruction. This is needed since there's - // no current handling of optional arguments. Fix here when a better handling - // of optional arguments is implemented. - if (Opcode == ARM::t2CPS3p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 3; - return true; - } - if (Opcode == ARM::t2CPS2p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::t2CPS1p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 1; - return true; - } - - // DBG has its option specified in Inst{3-0}. - if (Opcode == ARM::t2DBG) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - // MRS and MRSsys take one GPR reg Rs. - if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - NumOpsAdded = 1; - return true; - } - // BXJ takes one GPR reg Rn. - if (Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 1; - return true; - } - // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in - // bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::t2MSR) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ | - slice(insn, 11, 8) /* Special Reg */)); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 2; - return true; - } - // SMC take imm4. - if (Opcode == ARM::t2SMC) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); - NumOpsAdded = 1; - return true; - } - - // Some instructions have predicate operands first before the immediate. - if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) { - // Handling the two predicate operands before the imm operand. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - NumOpsAdded += 2; - else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - } - - // Add the imm operand. - int Offset = 0; - - switch (Opcode) { - default: - assert(0 && "Unexpected opcode"); - return false; - case ARM::t2B: - Offset = decodeImm32_B_EncodingT4(insn); - break; - case ARM::t2Bcc: - Offset = decodeImm32_B_EncodingT3(insn); - break; - case ARM::tBLr9: - Offset = decodeImm32_BL(insn); - break; - case ARM::tBLXi_r9: - Offset = decodeImm32_BLX(insn); - break; - } - - if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) - MI.addOperand(MCOperand::CreateImm(Offset)); - - // This is an increment as some predicate operands may have been added first. - NumOpsAdded += 1; - - return true; -} - -static inline bool Thumb2PreloadOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWs: - case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIs: - return true; - } -} - -static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // Preload Data/Instruction requires either 2 or 3 operands. - // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8 - // t2PLDr: Rn Rm - // t2PLDs: Rn Rm imm2=Inst{5-4} - // Same pattern applies for t2PLDW* and t2PLI*. - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - "Expect >= 2 operands and first one as reg operand"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - } else { - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - int Offset = 0; - if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || - Opcode == ARM::t2PLIi8) { - // A8.6.117 Encoding T2: add = FALSE - unsigned Imm8 = getImm8(insn); - Offset = -1 * Imm8; - } else { - // The i12 forms. See, for example, A8.6.117 Encoding T1. - // Note that currently t2PLDi12 also handles the previously named t2PLDpci - // opcode, that's why we use decodeImm12(insn) which returns +/- imm12. - Offset = decodeImm12(insn); - } - MI.addOperand(MCOperand::CreateImm(Offset)); - } - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs. - MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4))); - ++OpIdx; - } - - return true; -} - -static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load, - unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) { - - // Inst{22-21} encodes the data item transferred for load/store. - // For single word, it is encoded as ob10. - bool Word = (slice(insn, 22, 21) == 2); - bool Half = (slice(insn, 22, 21) == 1); - bool Byte = (slice(insn, 22, 21) == 0); - - if (UseRm && BadReg(R2)) { - DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n"); - return true; - } - - if (Load) { - if (!Word && R0 == 13) { - DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n"); - return true; - } - if (Byte) { - if (WB && R0 == 15 && slice(insn, 10, 8) == 3) { - // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0) - DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); - return true; - } - } - // A6.3.8 Load halfword, memory hints - if (Half) { - if (WB) { - if (R0 == R1) { - // A8.6.82 LDRSH (immediate) Encoding T2 - DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n"); - return true; - } - if (R0 == 15 && slice(insn, 10, 8) == 3) { - // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0) - DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); - return true; - } - } else { - if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) { - if (R0 == 15 && slice(insn, 10, 8) == 4) { - // A8.6.82 LDRSH (immediate) Encoding T2 - DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE" - << " \"Unallocated memory hints\"\n"); - return true; - } - } else { - if (R0 == 15) { - // A8.6.82 LDRSH (immediate) Encoding T1 - DEBUG(errs() << "if Rt == '1111' then SEE" - << " \"Unallocated memory hints\"\n"); - return true; - } - } - } - } - } else { - if (WB && R0 == R1) { - DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); - return true; - } - if ((WB && R0 == 15) || (!WB && R1 == 15)) { - DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n"); - return true; - } - if (Word) { - if ((WB && R1 == 15) || (!WB && R0 == 15)) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) { - DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n"); - return true; - } - } - } - return false; -} - -// A6.3.10 Store single data item -// A6.3.9 Load byte, memory hints -// A6.3.8 Load halfword, memory hints -// A6.3.7 Load word -// -// For example, -// -// t2LDRi12: Rd Rn (+)imm12 -// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also -// DisassembleThumb2DPSoReg) -// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// -// t2STRi12: Rd Rn (+)imm12 -// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also -// DisassembleThumb2DPSoReg) -// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// -// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated -// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first -// operand is Rn; for all the other instructions, Rd is the first operand. -// -// Delegates to DisassembleThumb2PreLoad() for preload data/instruction. -// Delegates to DisassembleThumb2Ldpci() for load * literal operations. -static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - unsigned Rn = decodeRn(insn); - - if (Thumb2PreloadOpcode(Opcode)) - return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - // See, for example, A6.3.7 Load word: Table A6-18 Load word. - if (Load && Rn == 15) - return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 3 && - OpInfo[0].RegClass > 0 && - OpInfo[1].RegClass > 0 && - "Expect >= 3 operands and first two as reg operands"); - - bool ThreeReg = (OpInfo[2].RegClass > 0); - bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1; - bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td - - // Build the register operands, followed by the immediate. - unsigned R0 = 0, R1 = 0, R2 = 0; - unsigned Rd = decodeRd(insn); - int Imm = 0; - - if (!Load && TIED_TO) { - R0 = Rn; - R1 = Rd; - } else { - R0 = Rd; - R1 = Rn; - } - if (ThreeReg) { - if (TIED_TO) { - R2 = Rn; - Imm = decodeImm8(insn); - } else { - R2 = decodeRm(insn); - // See, for example, A8.6.64 LDRB (register). - // And ARMAsmPrinter::printT2AddrModeSoRegOperand(). - // LSL is the default shift opc, and LLVM does not expect it to be encoded - // as part of the immediate operand. - // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4)); - Imm = slice(insn, 5, 4); - } - } else { - if (Imm12) - Imm = getImm12(insn); - else - Imm = decodeImm8(insn); - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - R0))); - ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - R1))); - ++OpIdx; - - if (ThreeReg) { - // This could be an offset register or a TIED_TO register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - R2))); - ++OpIdx; - } - - if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO, - TIED_TO)) - return false; - - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - - return true; -} - -// A6.3.12 Data-processing (register) -// -// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))] -// Three register operands only: Rs Rn Rm -// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))] -// -// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm -// -// Miscellaneous operations: Rs [Rn] Rm -static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && - OpInfo[0].RegClass > 0 && - OpInfo[1].RegClass > 0 && - "Expect >= 2 operands and first two as reg operands"); - - // Build the register operands, followed by the optional rotation amount. - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRs(insn)))); - ++OpIdx; - - if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRm(insn)))); - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Add the rotation amount immediate. - MI.addOperand(MCOperand::CreateImm(decodeRotate(insn))); - ++OpIdx; - } - - return true; -} - -// A6.3.16 Multiply, multiply accumulate, and absolute difference -// -// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12} -// t2MUL, t2SMMUL: Rs Rn Rm -// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12} -// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm -// -// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]: -// Rs Rn Rm Ra=Inst{15-12} -// -// Unsigned Sum of Absolute Differences [and Accumulate] -// Rs Rn Rm [Ra=Inst{15-12}] -static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && - OpInfo[2].RegClass == ARM::rGPRRegClassID && - "Expect >= 3 operands and first three as reg operands"); - - // Build the register operands. - - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRm(insn)))); - - if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRd(insn)))); - - NumOpsAdded = FourReg ? 4 : 3; - - return true; -} - -// A6.3.17 Long multiply, long multiply accumulate, and divide -// -// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm -static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && - OpInfo[2].RegClass == ARM::rGPRRegClassID && - "Expect >= 3 operands and first three as reg operands"); - - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - - // Build the register operands. - - if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRm(insn)))); - - if (FourReg) - NumOpsAdded = 4; - else - NumOpsAdded = 3; - - return true; -} - -// See A6.3 32-bit Thumb instruction encoding for instruction classes -// corresponding to (op1, op2, op). -// -// Table A6-9 32-bit Thumb instruction encoding -// op1 op2 op Instruction class, see -// --- ------- -- ----------------------------------------------------------- -// 01 00xx0xx - Load/store multiple on page A6-23 -// 00xx1xx - Load/store dual, load/store exclusive, table branch on -// page A6-24 -// 01xxxxx - Data-processing (shifted register) on page A6-31 -// 1xxxxxx - Coprocessor instructions on page A6-40 -// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15 -// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19 -// - 1 Branches and miscellaneous control on page A6-20 -// 11 000xxx0 - Store single data item on page A6-30 -// 001xxx0 - Advanced SIMD element or structure load/store instructions -// on page A7-27 -// 00xx001 - Load byte, memory hints on page A6-28 -// 00xx011 - Load halfword, memory hints on page A6-26 -// 00xx101 - Load word on page A6-25 -// 00xx111 - UNDEFINED -// 010xxxx - Data-processing (register) on page A6-33 -// 0110xxx - Multiply, multiply accumulate, and absolute difference on -// page A6-38 -// 0111xxx - Long multiply, long multiply accumulate, and divide on -// page A6-39 -// 1xxxxxx - Coprocessor instructions on page A6-40 -// -static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, - unsigned &NumOpsAdded, BO B) { - - switch (op1) { - case 1: - if (slice(op2, 6, 5) == 0) { - if (slice(op2, 2, 2) == 0) { - // Load/store multiple. - return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - - // Load/store dual, load/store exclusive, table branch, otherwise. - assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!"); - if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) || - (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) { - // Load/store exclusive. - return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - if (Opcode == ARM::t2LDRDi8 || - Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST || - Opcode == ARM::t2STRDi8 || - Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) { - // Load/store dual. - return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) { - // Table branch. - return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - } else if (slice(op2, 6, 5) == 1) { - // Data-processing (shifted register). - return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - - // FIXME: A6.3.18 Coprocessor instructions - // But see ThumbDisassembler::getInstruction(). - - break; - case 2: - if (op == 0) { - if (slice(op2, 5, 5) == 0) - // Data-processing (modified immediate) - return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - if (Thumb2SaturateOpcode(Opcode)) - return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B); - - // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - case 3: - switch (slice(op2, 6, 5)) { - case 0: - // Load/store instructions... - if (slice(op2, 0, 0) == 0) { - if (slice(op2, 4, 4) == 0) { - // Store single data item on page A6-30 - return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded, - B); - } else { - // FIXME: Advanced SIMD element or structure load/store instructions. - // But see ThumbDisassembler::getInstruction(). - ; - } - } else { - // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps, - NumOpsAdded, B); - } - break; - case 1: - if (slice(op2, 4, 4) == 0) { - // A6.3.12 Data-processing (register) - return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else if (slice(op2, 3, 3) == 0) { - // A6.3.16 Multiply, multiply accumulate, and absolute difference - return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // A6.3.17 Long multiply, long multiply accumulate, and divide - return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - break; - default: - // FIXME: A6.3.18 Coprocessor instructions - // But see ThumbDisassembler::getInstruction(). - ; - break; - } - - break; - default: - assert(0 && "Thumb2 encoding error!"); - break; - } - - return false; -} - -static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { - - uint16_t HalfWord = slice(insn, 31, 16); - - if (HalfWord == 0) { - // A6.2 16-bit Thumb instruction encoding - // op = bits[15:10] - uint16_t op = slice(insn, 15, 10); - return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); - } - - unsigned bits15_11 = slice(HalfWord, 15, 11); - - // A6.1 Thumb instruction set encoding - if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) { - assert("Bits[15:11] first halfword of Thumb2 instruction is out of range"); - return false; - } - - // A6.3 32-bit Thumb instruction encoding - - uint16_t op1 = slice(HalfWord, 12, 11); - uint16_t op2 = slice(HalfWord, 10, 4); - uint16_t op = slice(insn, 15, 15); - - return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); -} diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 78d3e47..ccdac3e 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "ARMBaseInfo.h" #include "ARMInstPrinter.h" -#include "ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -25,6 +25,23 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" +/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. +/// +/// getSORegOffset returns an integer from 0-31, representing '32' as 0. +static unsigned translateShiftImm(unsigned imm) { + if (imm == 0) + return 32; + return imm; +} + + +ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } @@ -33,11 +50,12 @@ void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { unsigned Opcode = MI->getOpcode(); // Check for MOVs and print canonical forms, instead. - if (Opcode == ARM::MOVs) { + if (Opcode == ARM::MOVsr) { // FIXME: Thumb variants? const MCOperand &Dst = MI->getOperand(0); const MCOperand &MO1 = MI->getOperand(1); @@ -51,20 +69,36 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << '\t' << getRegisterName(Dst.getReg()) << ", " << getRegisterName(MO1.getReg()); - if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) - return; + O << ", " << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + printAnnotation(O, Annot); + return; + } - O << ", "; + if (Opcode == ARM::MOVsi) { + // FIXME: Thumb variants? + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())); + printSBitModifierOperand(MI, 5, O); + printPredicateOperand(MI, 3, O); - if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) { + printAnnotation(O, Annot); + return; } + + O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printAnnotation(O, Annot); return; } + // A8.6.123 PUSH if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && MI->getOperand(0).getReg() == ARM::SP) { @@ -74,6 +108,15 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ".w"; O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } + if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(3).getImm() == -4) { + O << '\t' << "push"; + printPredicateOperand(MI, 4, O); + O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}"; + printAnnotation(O, Annot); return; } @@ -86,8 +129,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ".w"; O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); return; } + if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(4).getImm() == 4) { + O << '\t' << "pop"; + printPredicateOperand(MI, 5, O); + O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}"; + printAnnotation(O, Annot); + return; + } + // A8.6.355 VPUSH if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) && @@ -96,6 +149,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printPredicateOperand(MI, 2, O); O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); return; } @@ -106,10 +160,40 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printPredicateOperand(MI, 2, O); O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } + + if (Opcode == ARM::tLDMIA) { + bool Writeback = true; + unsigned BaseReg = MI->getOperand(0).getReg(); + for (unsigned i = 3; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) + Writeback = false; + } + + O << "\tldm"; + + printPredicateOperand(MI, 1, O); + O << '\t' << getRegisterName(BaseReg); + if (Writeback) O << "!"; + O << ", "; + printRegisterList(MI, 3, O); + printAnnotation(O, Annot); + return; + } + + // Thumb1 NOP + if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 && + MI->getOperand(1).getReg() == ARM::R8) { + O << "\tnop"; + printPredicateOperand(MI, 2, O); + printAnnotation(O, Annot); return; } printInstruction(MI, O); + printAnnotation(O, Annot); } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -122,16 +206,38 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } } } +void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + if (MO1.isExpr()) + O << *MO1.getExpr(); + else if (MO1.isImm()) + O << "[pc, #" << MO1.getImm() << "]"; + else + llvm_unreachable("Unknown LDR label operand?"); +} + // so_reg is a 4-operand unit corresponding to register forms of the A5.1 // "Addressing Mode 1 - Data-processing operands" forms. This includes: // REG 0 0 - e.g. R5 // REG REG 0,SH_OPC - e.g. R5, ROR R3 // REG 0 IMM,SH_OPC - e.g. R5, LSL #3 -void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, +void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); @@ -144,14 +250,27 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc == ARM_AM::rrx) return; - if (MO2.getReg()) { - O << ' ' << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else if (ShOpc != ARM_AM::rrx) { - O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); - } + + O << ' ' << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); +} + +void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << getRegisterName(MO1.getReg()); + + // Print the shift opc. + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc == ARM_AM::rrx) + return; + O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); } + //===--------------------------------------------------------------------===// // Addressing Mode #2 //===--------------------------------------------------------------------===// @@ -209,6 +328,22 @@ void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, << " #" << ShImm; } +void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()) << ", " + << getRegisterName(MO2.getReg()) << "]"; +} + +void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()) << ", " + << getRegisterName(MO2.getReg()) << ", lsl #1]"; +} + void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); @@ -284,7 +419,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, O << '[' << getRegisterName(MO1.getReg()); if (MO2.getReg()) { - O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) + O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) << getRegisterName(MO2.getReg()) << ']'; return; } @@ -315,8 +450,8 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); if (MO1.getReg()) { - O << (char)ARM_AM::getAM3Op(MO2.getImm()) - << getRegisterName(MO1.getReg()); + O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); return; } @@ -326,6 +461,31 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, << ImmOffs; } +void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + unsigned Imm = MO.getImm(); + O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff); +} + +void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg()); +} + +void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + unsigned Imm = MO.getImm(); + O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2); +} + + void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum) @@ -345,7 +505,9 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); + unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); + if (ImmOffs || Op == ARM_AM::sub) { O << ", #" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs * 4; @@ -402,20 +564,31 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned ShiftOp = MI->getOperand(OpNum).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - switch (Opc) { - case ARM_AM::no_shift: + bool isASR = (ShiftOp & (1 << 5)) != 0; + unsigned Amt = ShiftOp & 0x1f; + if (isASR) + O << ", asr #" << (Amt == 0 ? 32 : Amt); + else if (Amt) + O << ", lsl #" << Amt; +} + +void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + if (Imm == 0) return; - case ARM_AM::lsl: - O << ", lsl #"; - break; - case ARM_AM::asr: - O << ", asr #"; - break; - default: - assert(0 && "unexpected shift opcode for shift immediate operand"); - } - O << ARM_AM::getSORegOffset(ShiftOp); + assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!"); + O << ", lsl #" << Imm; +} + +void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + // A shift amount of 32 is encoded as 0. + if (Imm == 0) + Imm = 32; + assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!"); + O << ", asr #" << Imm; } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, @@ -450,6 +623,9 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, for (int i=2; i >= 0; --i) if (IFlags & (1 << i)) O << ARM_PROC::IFlagsToString(1 << i); + + if (IFlags == 0) + O << "none"; } void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, @@ -458,10 +634,43 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; + if (getAvailableFeatures() & ARM::FeatureMClass) { + switch (Op.getImm()) { + default: assert(0 && "Unexpected mask value!"); + case 0: O << "apsr"; return; + case 1: O << "iapsr"; return; + case 2: O << "eapsr"; return; + case 3: O << "xpsr"; return; + case 5: O << "ipsr"; return; + case 6: O << "epsr"; return; + case 7: O << "iepsr"; return; + case 8: O << "msp"; return; + case 9: O << "psp"; return; + case 16: O << "primask"; return; + case 17: O << "basepri"; return; + case 18: O << "basepri_max"; return; + case 19: O << "faultmask"; return; + case 20: O << "control"; return; + } + } + + // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as + // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively. + if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { + O << "APSR_"; + switch (Mask) { + default: assert(0); + case 4: O << "g"; return; + case 8: O << "nzcvq"; return; + case 12: O << "nzcvqg"; return; + } + llvm_unreachable("Unexpected mask value!"); + } + if (SpecRegRBit) - O << "spsr"; + O << "SPSR"; else - O << "cpsr"; + O << "CPSR"; if (Mask) { O << '_'; @@ -501,15 +710,20 @@ void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + raw_ostream &O) { O << "p" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + raw_ostream &O) { O << "c" << MI->getOperand(OpNum).getImm(); } +void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "{" << MI->getOperand(OpNum).getImm() << "}"; +} + void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O) { llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); @@ -517,7 +731,13 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << MI->getOperand(OpNum).getImm() * 4; + O << "#" << MI->getOperand(OpNum).getImm() * 4; +} + +void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + O << "#" << (Imm == 0 ? 32 : Imm); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, @@ -610,7 +830,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc != ARM_AM::rrx) - O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); + O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); } void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, @@ -647,7 +867,9 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, int32_t OffImm = (int32_t)MO2.getImm(); // Don't print +0. - if (OffImm < 0) + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) O << ", #-" << -OffImm; else if (OffImm > 0) O << ", #" << OffImm; @@ -671,6 +893,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "]"; } +void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) + O << ", #" << MO2.getImm() * 4; + O << "]"; +} + void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -678,9 +912,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, int32_t OffImm = (int32_t)MO1.getImm(); // Don't print +0. if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; + O << ", #-" << -OffImm; + else + O << ", #" << OffImm; } void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, @@ -689,10 +923,13 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm() / 4; // Don't print +0. - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; + if (OffImm != 0) { + O << ", "; + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; + } } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, @@ -715,39 +952,10 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, O << "]"; } -void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) { - O << (float)MO.getFPImm(); - } else { - union { - uint32_t I; - float F; - } FPUnion; - - FPUnion.I = MO.getImm(); - O << FPUnion.F; - } -} - -void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) { - O << MO.getFPImm(); - } else { - // We expect the binary encoding of a floating point number here. - union { - uint64_t I; - double D; - } FPUnion; - - FPUnion.I = MO.getImm(); - O << FPUnion.D; - } + O << '#' << ARM_AM::getFPImmFloat(MO.getImm()); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, @@ -757,3 +965,28 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); O << "#0x" << utohexstr(Val); } + +void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + O << "#" << Imm + 1; +} + +void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + if (Imm == 0) + return; + O << ", ror #"; + switch (Imm) { + default: assert (0 && "illegal ror immediate!"); + case 1: O << "8"; break; + case 2: O << "16"; break; + case 3: O << "24"; break; + } +} + +void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "[" << MI->getOperand(OpNum).getImm() << "]"; +} diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index d5f238b..5c2173f 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -15,6 +15,7 @@ #define ARMINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { @@ -22,10 +23,9 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; @@ -38,8 +38,11 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, @@ -48,11 +51,15 @@ public: raw_ostream &O); void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O); + void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -65,8 +72,11 @@ public: raw_ostream &O); void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -88,6 +98,8 @@ public: raw_ostream &O); void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, @@ -108,11 +120,15 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt deleted file mode 100644 index 18645c0..0000000 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMAsmPrinter - ARMInstPrinter.cpp - ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile deleted file mode 100644 index 65d372e..0000000 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 595708f..9982fa6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -14,7 +14,8 @@ #ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H -#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -32,7 +33,8 @@ namespace ARM_AM { }; enum AddrOpc { - add = '+', sub = '-' + sub = 0, + add }; static inline const char *getAddrOpcStr(AddrOpc Op) { @@ -60,20 +62,6 @@ namespace ARM_AM { } } - static inline ShiftOpc getShiftOpcForNode(SDValue N) { - switch (N.getOpcode()) { - default: return ARM_AM::no_shift; - case ISD::SHL: return ARM_AM::lsl; - case ISD::SRL: return ARM_AM::lsr; - case ISD::SRA: return ARM_AM::asr; - case ISD::ROTR: return ARM_AM::ror; - //case ISD::ROTL: // Only if imm -> turn into ROTR. - // Can't handle RRX here, because it would require folding a flag into - // the addressing mode. :( This causes us to miss certain things. - //case ARMISD::RRX: return ARM_AM::rrx; - } - } - enum AMSubMode { bad_am_submode = 0, ia, @@ -588,6 +576,90 @@ namespace ARM_AM { AMSubMode getLoadStoreMultipleSubMode(int Opcode); + //===--------------------------------------------------------------------===// + // Floating-point Immediates + // + static inline float getFPImmFloat(unsigned Imm) { + // We expect an 8-bit binary encoding of a floating-point number here. + union { + uint32_t I; + float F; + } FPUnion; + + uint8_t Sign = (Imm >> 7) & 0x1; + uint8_t Exp = (Imm >> 4) & 0x7; + uint8_t Mantissa = Imm & 0xf; + + // 8-bit FP iEEEE Float Encoding + // abcd efgh aBbbbbbc defgh000 00000000 00000000 + // + // where B = NOT(b); + + FPUnion.I = 0; + FPUnion.I |= Sign << 31; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25; + FPUnion.I |= (Exp & 0x3) << 23; + FPUnion.I |= Mantissa << 19; + return FPUnion.F; + } + + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP32Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 + int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x7ffff) + return -1; + Mantissa >>= 19; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP32Imm(const APFloat &FPImm) { + return getFP32Imm(FPImm.bitcastToAPInt()); + } + + /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP64Imm(const APInt &Imm) { + uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; + int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 + uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0xffffffffffffULL) + return -1; + Mantissa >>= 48; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP64Imm(const APFloat &FPImm) { + return getFP64Imm(FPImm.bitcastToAPInt()); + } + } // end namespace ARM_AM } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 5e438a9..c31c5e6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" @@ -19,12 +20,12 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; namespace { @@ -35,13 +36,24 @@ public: /*HasRelocationAddend*/ false) {} }; -class ARMAsmBackend : public TargetAsmBackend { +class ARMAsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; bool isThumbMode; // Currently emitting Thumb code. public: - ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {} + ARMAsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), + isThumbMode(TT.startswith("thumb")) {} + + ~ARMAsmBackend() { + delete STI; + } unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; } + bool hasNOP() const { + return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -65,9 +77,9 @@ public: { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. { "fixup_arm_movt_hi16", 0, 20, 0 }, @@ -81,7 +93,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -123,20 +135,28 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { } bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 + const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP + const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 + const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP if (isThumb()) { - // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to - // use 0x46c0 (which is a 'mov r8, r8' insn). + const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding + : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write16(0xbf00); + OW->Write16(nopEncoding); if (Count & 1) OW->Write8(0); return true; } // ARM mode + const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding + : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0xe1a00000); + OW->Write32(nopEncoding); + // FIXME: should this function return false when unable to write exactly + // 'Count' bytes with NOP encodings? switch (Count % 4) { default: break; // No leftover bytes to write case 1: OW->Write8(0); break; @@ -163,8 +183,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_movw_lo16_pcrel: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; - assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) && - "Out of range pc-relative fixup value!"); // inst{19-16} = Hi4; // inst{11-0} = Lo12; Value = (Hi4 << 16) | (Lo12); @@ -185,10 +203,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // inst{26} = i; // inst{14-12} = Mid3; // inst{7-0} = Lo8; - // The value comes in as the whole thing, not just the portion required - // for this fixup, so we need to mask off the bits not handled by this - // portion (lo vs. hi). - Value &= 0xffff; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); uint64_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -382,8 +396,9 @@ namespace { class ELFARMAsmBackend : public ARMAsmBackend { public: Triple::OSType OSType; - ELFARMAsmBackend(const Target &T, Triple::OSType _OSType) - : ARMAsmBackend(T), OSType(_OSType) { } + ELFARMAsmBackend(const Target &T, const StringRef TT, + Triple::OSType _OSType) + : ARMAsmBackend(T, TT), OSType(_OSType) { } void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; @@ -414,8 +429,9 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, class DarwinARMAsmBackend : public ARMAsmBackend { public: const object::mach::CPUSubtypeARM Subtype; - DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st) - : ARMAsmBackend(T), Subtype(st) { } + DarwinARMAsmBackend(const Target &T, const StringRef TT, + object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T, TT), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, @@ -492,25 +508,24 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, } // end anonymous namespace -TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { if (TheTriple.getArchName() == "armv4t" || TheTriple.getArchName() == "thumbv4t") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T); else if (TheTriple.getArchName() == "armv5e" || TheTriple.getArchName() == "thumbv5e") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ); else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); - return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on ARM"); - return new ELFARMAsmBackend(T, Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, Triple(TT).getOS()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 458f7dd..ec4b6ff 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -17,12 +17,9 @@ #ifndef ARMBASEINFO_H #define ARMBASEINFO_H -#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" -// Note that the following auto-generated files only defined enum types, and -// so are safe to include here. - namespace llvm { // Enums corresponding to ARM condition codes @@ -191,6 +188,22 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { } } +/// isARMLowRegister - Returns true if the register is a low register (r0-r7). +/// +static inline bool isARMLowRegister(unsigned Reg) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// namespace ARMII { /// ARM Index Modes @@ -287,6 +300,148 @@ namespace ARMII { /// call operand. MO_PLT }; + + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This four-bit field describes the addressing mode used. + AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h + + // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load + // and store ops only. Generic "updating" flag is used for ld/st multiple. + // The index mode enums are declared in ARMBaseInfo.h + IndexModeShift = 5, + IndexModeMask = 3 << IndexModeShift, + + //===------------------------------------------------------------------===// + // Instruction encoding formats. + // + FormShift = 7, + FormMask = 0x3f << FormShift, + + // Pseudo instructions + Pseudo = 0 << FormShift, + + // Multiply instructions + MulFrm = 1 << FormShift, + + // Branch instructions + BrFrm = 2 << FormShift, + BrMiscFrm = 3 << FormShift, + + // Data Processing instructions + DPFrm = 4 << FormShift, + DPSoRegFrm = 5 << FormShift, + + // Load and Store + LdFrm = 6 << FormShift, + StFrm = 7 << FormShift, + LdMiscFrm = 8 << FormShift, + StMiscFrm = 9 << FormShift, + LdStMulFrm = 10 << FormShift, + + LdStExFrm = 11 << FormShift, + + // Miscellaneous arithmetic instructions + ArithMiscFrm = 12 << FormShift, + SatFrm = 13 << FormShift, + + // Extend instructions + ExtFrm = 14 << FormShift, + + // VFP formats + VFPUnaryFrm = 15 << FormShift, + VFPBinaryFrm = 16 << FormShift, + VFPConv1Frm = 17 << FormShift, + VFPConv2Frm = 18 << FormShift, + VFPConv3Frm = 19 << FormShift, + VFPConv4Frm = 20 << FormShift, + VFPConv5Frm = 21 << FormShift, + VFPLdStFrm = 22 << FormShift, + VFPLdStMulFrm = 23 << FormShift, + VFPMiscFrm = 24 << FormShift, + + // Thumb format + ThumbFrm = 25 << FormShift, + + // Miscelleaneous format + MiscFrm = 26 << FormShift, + + // NEON formats + NGetLnFrm = 27 << FormShift, + NSetLnFrm = 28 << FormShift, + NDupFrm = 29 << FormShift, + NLdStFrm = 30 << FormShift, + N1RegModImmFrm= 31 << FormShift, + N2RegFrm = 32 << FormShift, + NVCVTFrm = 33 << FormShift, + NVDupLnFrm = 34 << FormShift, + N2RegVShLFrm = 35 << FormShift, + N2RegVShRFrm = 36 << FormShift, + N3RegFrm = 37 << FormShift, + N3RegVShFrm = 38 << FormShift, + NVExtFrm = 39 << FormShift, + NVMulSLFrm = 40 << FormShift, + NVTBLFrm = 41 << FormShift, + + //===------------------------------------------------------------------===// + // Misc flags. + + // UnaryDP - Indicates this is a unary data processing instruction, i.e. + // it doesn't have a Rn operand. + UnaryDP = 1 << 13, + + // Xform16Bit - Indicates this Thumb2 instruction may be transformed into + // a 16-bit Thumb instruction if certain conditions are met. + Xform16Bit = 1 << 14, + + // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb + // instruction. Used by the parser to determine whether to require the 'S' + // suffix on the mnemonic (when not in an IT block) or preclude it (when + // in an IT block). + ThumbArithFlagSetting = 1 << 18, + + //===------------------------------------------------------------------===// + // Code domain. + DomainShift = 15, + DomainMask = 7 << DomainShift, + DomainGeneral = 0 << DomainShift, + DomainVFP = 1 << DomainShift, + DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, + + //===------------------------------------------------------------------===// + // Field shifts - such shifts are used to set field while generating + // machine instructions. + // + // FIXME: This list will need adjusting/fixing as the MC code emitter + // takes shape and the ARMCodeEmitter.cpp bits go away. + ShiftTypeShift = 4, + + M_BitShift = 5, + ShiftImmShift = 5, + ShiftShift = 7, + N_BitShift = 7, + ImmHiShift = 8, + SoRotImmShift = 8, + RegRsShift = 8, + ExtRotImmShift = 10, + RegRdLoShift = 12, + RegRdShift = 12, + RegRdHiShift = 16, + RegRnShift = 16, + S_BitShift = 20, + W_BitShift = 21, + AM3_I_BitShift = 22, + D_BitShift = 22, + U_BitShift = 23, + P_BitShift = 24, + I_BitShift = 25, + CondShift = 28 + }; + } // end namespace ARMII } // end namespace llvm; diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 350c92d..350c92d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 53b4c95..1c109e0 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -52,6 +52,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; CommentString = "@"; + Code16Directive = ".code\t16"; + Code32Directive = ".code\t32"; + SupportsDebugInformation = true; // Exceptions handling @@ -64,12 +67,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Data64bitsDirective = 0; CommentString = "@"; - - HasLEB128 = true; PrivateGlobalPrefix = ".L"; + Code16Directive = ".code\t16"; + Code32Directive = ".code\t32"; + WeakRefDirective = "\t.weak\t"; - HasLCOMMDirective = true; + LCOMMDirectiveType = LCOMM::NoAlignment; + HasLEB128 = true; SupportsDebugInformation = true; // Exceptions handling diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 39be3f0..865c3e2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -12,17 +12,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMFixupKinds.h" -#include "ARMInstrInfo.h" -#include "ARMMCExpr.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" @@ -112,11 +113,13 @@ public: /// immediate Thumb2 direct branch target. uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; - + /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. @@ -142,6 +145,16 @@ public: uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2' + /// operand. + uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2' + /// operand. + uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm' /// operand as needed by load/store instructions. @@ -183,6 +196,10 @@ public: uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + /// getPostIdxRegOpValue - Return encoding for postidx_reg operands. + uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands. uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; @@ -251,27 +268,13 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; /// getSORegOpValue - Return an encoded so_reg shifted register value. - unsigned getSORegOpValue(const MCInst &MI, unsigned Op, + unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getRotImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - switch (MI.getOperand(Op).getImm()) { - default: assert (0 && "Not a valid rot_imm value!"); - case 0: return 0; - case 8: return 1; - case 16: return 2; - case 24: return 3; - } - } - - unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - return MI.getOperand(Op).getImm() - 1; - } - unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { return 64 - MI.getOperand(Op).getImm(); @@ -280,12 +283,6 @@ public: unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getMsbOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; - - unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, @@ -306,6 +303,9 @@ public: unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, @@ -439,8 +439,10 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, bool isAdd = true; // Special value for #-0 - if (SImm == INT32_MIN) + if (SImm == INT32_MIN) { SImm = 0; + isAdd = false; + } // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (SImm < 0) { @@ -470,11 +472,34 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return 0; } +// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are +// determined by negating them and XOR'ing them with bit 23. +static int32_t encodeThumbBLOffset(int32_t offset) { + offset >>= 1; + uint32_t S = (offset & 0x800000) >> 23; + uint32_t J1 = (offset & 0x400000) >> 22; + uint32_t J2 = (offset & 0x200000) >> 21; + J1 = (~J1 & 0x1); + J2 = (~J2 & 0x1); + J1 ^= S; + J2 ^= S; + + offset &= ~0x600000; + offset |= J1 << 22; + offset |= J2 << 21; + + return offset; +} + /// getThumbBLTargetOpValue - Return encoding info for immediate branch target. uint32_t ARMMCCodeEmitter:: getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, + Fixups); + return encodeThumbBLOffset(MO.getImm()); } /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate @@ -482,28 +507,43 @@ getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, + Fixups); + return encodeThumbBLOffset(MO.getImm()); } /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, + Fixups); + return (MO.getImm() >> 1); } /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, + Fixups); + return (MO.getImm() >> 1); } /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); + return (MO.getImm() >> 1); } /// Return true if this branch has a non-always predication @@ -513,9 +553,9 @@ static bool HasConditionalBranch(const MCInst &MI) { for (int i = 0; i < NumOp-1; ++i) { const MCOperand &MCOp1 = MI.getOperand(i); const MCOperand &MCOp2 = MI.getOperand(i + 1); - if (MCOp1.isImm() && MCOp2.isReg() && + if (MCOp1.isImm() && MCOp2.isReg() && (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) { - if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) + if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) return true; } } @@ -541,15 +581,32 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - if (HasConditionalBranch(MI)) + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbranch, Fixups); return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); -} + ARM::fixup_arm_uncondbranch, Fixups); + } + return MO.getImm() >> 2; +} +uint32_t ARMMCCodeEmitter:: +getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbranch, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_uncondbranch, Fixups); + } + return MO.getImm() >> 1; +} /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit /// immediate branch target. @@ -579,9 +636,18 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, + Fixups); + int32_t offset = MO.getImm(); + uint32_t Val = 0x2000; + if (offset < 0) { + Val = 0x1000; + offset *= -1; + } + Val |= offset; + return Val; } /// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label @@ -589,9 +655,16 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, + Fixups); + int32_t Val = MO.getImm(); + if (Val < 0) { + Val *= -1; + Val |= 0x1000; + } + return Val; } /// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label @@ -599,9 +672,11 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, + Fixups); + return MO.getImm(); } /// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg' @@ -635,17 +710,26 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. - assert(MO.isExpr() && "Unexpected machine operand type!"); - const MCExpr *Expr = MO.getExpr(); - - MCFixupKind Kind; - if (isThumb2()) - Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); - else - Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); - ++MCNumCPRelocations; + MCFixupKind Kind; + if (isThumb2()) + Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); + else + Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + ++MCNumCPRelocations; + } else { + Reg = ARM::PC; + int32_t Offset = MO.getImm(); + if (Offset < 0) { + Offset *= -1; + isAdd = false; + } + Imm12 = Offset; + } } else isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups); @@ -657,6 +741,37 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getT2Imm8s4OpValue - Return encoding info for +/// '+/- imm8<<2' operand. +uint32_t ARMMCCodeEmitter:: +getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // FIXME: The immediate operand should have already been encoded like this + // before ever getting here. The encoder method should just need to combine + // the MI operands for the register and the offset into a single + // representation for the complex operand in the .td file. This isn't just + // style, unfortunately. As-is, we can't represent the distinct encoding + // for #-0. + + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + int32_t Imm8 = MI.getOperand(OpIdx).getImm(); + bool isAdd = Imm8 >= 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (Imm8 < 0) + Imm8 = -Imm8; + + // Scaled by 4. + Imm8 /= 4; + + uint32_t Binary = Imm8 & 0xff; + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + return Binary; +} + /// getT2AddrModeImm8s4OpValue - Return encoding info for /// 'reg +/- imm8<<2' operand. uint32_t ARMMCCodeEmitter:: @@ -683,6 +798,12 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, } else isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + // FIXME: The immediate operand should have already been encoded like this + // before ever getting here. The encoder method should just need to combine + // the MI operands for the register and the offset into a single + // representation for the complex operand in the .td file. This isn't just + // style, unfortunately. As-is, we can't represent the distinct encoding + // for #-0. uint32_t Binary = (Imm8 >> 2) & 0xff; // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (isAdd) @@ -691,6 +812,20 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for +/// 'reg + imm8<<2' operand. +uint32_t ARMMCCodeEmitter:: +getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {11-8} = reg + // {7-0} = imm8 + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Imm8 = MO1.getImm(); + return (Reg << 8) | Imm8; +} + // FIXME: This routine assumes that a binary // expression will always result in a PCRel expression // In reality, its only true if one or more subexpressions @@ -818,6 +953,17 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: +getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {4} isAdd + // {3-0} Rm + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + bool isAdd = MO1.getImm() != 0; + return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4); +} + +uint32_t ARMMCCodeEmitter:: getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { // {9} 1 == imm8, 0 == Rm @@ -891,7 +1037,10 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); + return (MO.getImm() >> 2); } /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. @@ -934,20 +1083,17 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, } unsigned ARMMCCodeEmitter:: -getSORegOpValue(const MCInst &MI, unsigned OpIdx, +getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be - // shifted. The second is either Rs, the amount to shift by, or reg0 in which - // case the imm contains the amount to shift by. + // shifted. The second is Rs, the amount to shift by, and the third specifies + // the type of the shift. // // {3-0} = Rm. - // {4} = 1 if reg shift, 0 if imm shift + // {4} = 1 // {6-5} = type - // If reg shift: - // {11-8} = Rs - // {7} = 0 - // else (imm shift) - // {11-7} = imm + // {11-8} = Rs + // {7} = 0 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -966,45 +1112,70 @@ getSORegOpValue(const MCInst &MI, unsigned OpIdx, // LSR - 0011 // ASR - 0101 // ROR - 0111 - // RRX - 0110 and bit[11:8] clear. switch (SOpc) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::lsl: SBits = 0x1; break; case ARM_AM::lsr: SBits = 0x3; break; case ARM_AM::asr: SBits = 0x5; break; case ARM_AM::ror: SBits = 0x7; break; - case ARM_AM::rrx: SBits = 0x6; break; - } - } else { - // Set shift operand (bit[6:4]). - // LSL - 000 - // LSR - 010 - // ASR - 100 - // ROR - 110 - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x0; break; - case ARM_AM::lsr: SBits = 0x2; break; - case ARM_AM::asr: SBits = 0x4; break; - case ARM_AM::ror: SBits = 0x6; break; } } Binary |= SBits << 4; - if (SOpc == ARM_AM::rrx) - return Binary; - // Encode the shift operation Rs or shift_imm (except rrx). - if (Rs) { - // Encode Rs bit[11:8]. - assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + // Encode the shift operation Rs. + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); +} + +unsigned ARMMCCodeEmitter:: +getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // Sub-operands are [reg, imm]. The first register is Rm, the reg to be + // shifted. The second is the amount to shift by. + // + // {3-0} = Rm. + // {4} = 0 + // {6-5} = type + // {11-7} = imm + + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); + + // Encode Rm. + unsigned Binary = getARMRegisterNumbering(MO.getReg()); + + // Encode the shift opcode. + unsigned SBits = 0; + + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + // RRX - 110 and bit[11:8] clear. + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + case ARM_AM::rrx: + Binary |= 0x60; + return Binary; } // Encode shift_imm bit[11:7]. - return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; + Binary |= SBits << 4; + unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm()); + assert(Offset && "Offset must be in range 1-32!"); + if (Offset == 32) Offset = 0; + return Binary | (Offset << 7); } + unsigned ARMMCCodeEmitter:: getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { @@ -1106,6 +1277,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, case ARM_AM::lsl: SBits = 0x0; break; case ARM_AM::lsr: SBits = 0x2; break; case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::rrx: // FALLTHROUGH case ARM_AM::ror: SBits = 0x6; break; } @@ -1131,24 +1303,6 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, } unsigned ARMMCCodeEmitter:: -getMsbOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - // MSB - 5 bits. - uint32_t lsb = MI.getOperand(Op-1).getImm(); - uint32_t width = MI.getOperand(Op).getImm(); - uint32_t msb = lsb+width-1; - assert (width != 0 && msb < 32 && "Illegal bit width!"); - return msb; -} - -unsigned ARMMCCodeEmitter:: -getSsatBitPosValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - // For ssat instructions, the bit position should be encoded decremented by 1 - return MI.getOperand(Op).getImm()-1; -} - -unsigned ARMMCCodeEmitter:: getRegisterListOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { // VLDM/VSTM: @@ -1158,8 +1312,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = ARM::SPRRegClass.contains(Reg); - bool DPRRegs = ARM::DPRRegClass.contains(Reg); + bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; @@ -1299,7 +1453,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, Size = Desc.getSize(); else llvm_unreachable("Unexpected instruction size!"); - + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); // Thumb 32-bit wide instructions need to emit the high order halfword // first. diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2727ba8..2727ba8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 0a2e883..0a2e883 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f8fcf2b..a55c410 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -13,10 +13,16 @@ #include "ARMMCTargetDesc.h" #include "ARMMCAsmInfo.h" +#include "ARMBaseInfo.h" +#include "InstPrinter/ARMInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" @@ -35,7 +41,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { unsigned Len = TT.size(); unsigned Idx = 0; - // FIXME: Enahnce Triple helper class to extract ARM version. + // FIXME: Enhance Triple helper class to extract ARM version. bool isThumb = false; if (Len >= 5 && TT.substr(0, 4) == "armv") Idx = 4; @@ -50,18 +56,21 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv - ARMArchFeature = "+v7,+noarm,+db,+hwdiv"; + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk"; + // FeatureT2XtPk, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; } else - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2 - ARMArchFeature = "+v7,+neon,+db,+t2dsp"; + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; } else if (SubVer == '6') { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') ARMArchFeature = "+v6t2"; + else if (Len >= Idx+2 && TT[Idx+1] == 'm') + // v6m: FeatureNoARM, FeatureMClass + ARMArchFeature = "+v6t2,+noarm,+mclass"; else ARMArchFeature = "+v6"; } else if (SubVer == '5') { @@ -80,6 +89,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { ARMArchFeature += ",+thumb-mode"; } + Triple TheTriple(TT); + if (TheTriple.getOS() == Triple::NativeClient) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+nacl-mode"; + else + ARMArchFeature += ",+nacl-mode"; + } + return ARMArchFeature; } @@ -98,36 +115,18 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -// Force static initialization. -extern "C" void LLVMInitializeARMMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, - ARM_MC::createARMMCSubtargetInfo); -} - static MCInstrInfo *createARMMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitARMMCInstrInfo(X); return X; } -extern "C" void LLVMInitializeARMMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); -} - -static MCRegisterInfo *createARMMCRegisterInfo() { +static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitARMMCRegisterInfo(X); + InitARMMCRegisterInfo(X, ARM::LR); return X; } -extern "C" void LLVMInitializeARMMCRegInfo() { - TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); -} - static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); @@ -137,8 +136,128 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { return new ARMELFMCAsmInfo(); } -extern "C" void LLVMInitializeARMMCAsmInfo() { - // Register the target asm info. +static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) { + Triple TheTriple(TT); + // Default relocation model on Darwin is PIC, not DynamicNoPIC. + RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; + } + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) + return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + + if (TheTriple.isOSWindows()) { + llvm_unreachable("ARM does not support Windows COFF format"); + return NULL; + } + + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createARMMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new ARMInstPrinter(MAI, STI); + return 0; +} + +namespace { + +class ARMMCInstrAnalysis : public MCInstrAnalysis { +public: + ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + // BCCs with the "always" predicate are unconditional branches. + if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + // BCCs with the "always" predicate are unconditional branches. + if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + // We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(0).getImm(); + // FIXME: This is not right for thumb. + return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + } +}; + +} + +static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { + return new ARMMCInstrAnalysis(Info); +} + +// Force static initialization. +extern "C" void LLVMInitializeARMTargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo); RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, + ARM_MC::createARMMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, + ARM_MC::createARMMCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget, + createARMMCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget, + createARMMCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 74701e3..9b3d3bd 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -14,12 +14,19 @@ #ifndef ARMMCTARGETDESC_H #define ARMMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" #include <string> namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; -class Target; class StringRef; +class Target; +class raw_ostream; extern Target TheARMTarget, TheThumbTarget; @@ -33,6 +40,18 @@ namespace ARM_MC { StringRef FS); } +MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); + +/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. +MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + } // End llvm namespace // Defines symbolic names for ARM registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index a36e47d..352c73e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMFixupKinds.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" @@ -19,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; using namespace llvm::object; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 68daf42..0000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMARMDesc - ARMMCTargetDesc.cpp - ARMMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile deleted file mode 100644 index 448ed9d..0000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp deleted file mode 100644 index c85d1e9..0000000 --- a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp +++ /dev/null @@ -1,149 +0,0 @@ -//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "neon-mov-fix" -#include "ARM.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumVMovs, "Number of reg-reg moves converted"); - -namespace { - struct NEONMoveFixPass : public MachineFunctionPass { - static char ID; - NEONMoveFixPass() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "NEON reg-reg move conversion"; - } - - private: - const TargetRegisterInfo *TRI; - const ARMBaseInstrInfo *TII; - bool isA8; - - typedef DenseMap<unsigned, const MachineInstr*> RegMap; - - bool InsertMoves(MachineBasicBlock &MBB); - }; - char NEONMoveFixPass::ID = 0; -} - -static bool inNEONDomain(unsigned Domain, bool isA8) { - return (Domain & ARMII::DomainNEON) || - (isA8 && (Domain & ARMII::DomainNEONA8)); -} - -bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { - RegMap Defs; - bool Modified = false; - - // Walk over MBB tracking the def points of the registers. - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; - for (; MII != E; MII = NextMII) { - NextMII = llvm::next(MII); - MachineInstr *MI = &*MII; - - if (MI->getOpcode() == ARM::VMOVD && - !TII->isPredicated(MI)) { - unsigned SrcReg = MI->getOperand(1).getReg(); - // If we do not find an instruction defining the reg, this means the - // register should be live-in for this BB. It's always to better to use - // NEON reg-reg moves. - unsigned Domain = ARMII::DomainNEON; - RegMap::iterator DefMI = Defs.find(SrcReg); - if (DefMI != Defs.end()) { - Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask; - // Instructions in general domain are subreg accesses. - // Map them to NEON reg-reg moves. - if (Domain == ARMII::DomainGeneral) - Domain = ARMII::DomainNEON; - } - - if (inNEONDomain(Domain, isA8)) { - // Convert VMOVD to VORRd - unsigned DestReg = MI->getOperand(0).getReg(); - - DEBUG({errs() << "vmov convert: "; MI->dump();}); - - // It's safe to ignore imp-defs / imp-uses here, since: - // - We're running late, no intelligent condegen passes should be run - // afterwards - // - The imp-defs / imp-uses are superregs only, we don't care about - // them. - AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VORRd), DestReg) - .addReg(SrcReg).addReg(SrcReg)); - MBB.erase(MI); - MachineBasicBlock::iterator I = prior(NextMII); - MI = &*I; - - DEBUG({errs() << " into: "; MI->dump();}); - - Modified = true; - ++NumVMovs; - } else { - assert((Domain & ARMII::DomainVFP) && "Invalid domain!"); - // Do nothing. - } - } - - // Update def information. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned MOReg = MO.getReg(); - - Defs[MOReg] = MI; - // Catch aliases as well. - for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R) - Defs[*R] = MI; - } - } - - return Modified; -} - -bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { - ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); - const TargetMachine &TM = Fn.getTarget(); - - if (AFI->isThumb1OnlyFunction()) - return false; - - TRI = TM.getRegisterInfo(); - TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); - isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8(); - - bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; - Modified |= InsertMoves(MBB); - } - - return Modified; -} - -/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix -/// pass. -FunctionPass *llvm::createNEONMoveFixPass() { - return new NEONMoveFixPass(); -} diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 163a0a9..500e3de 100644 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -9,7 +9,7 @@ #include "ARM.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheARMTarget, llvm::TheThumbTarget; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index c258870..d848177 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -21,7 +21,7 @@ using namespace llvm; -bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { +bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -133,9 +133,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) - .setMIFlags(MachineInstr::FrameSetup); + .setMIFlags(MachineInstr::FrameSetup)); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. @@ -155,6 +155,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + // Thumb1 does not currently support dynamic stack realignment. Report a + // fatal error rather then silently generate bad code. + if (RegInfo->needsStackRealignment(MF)) + report_fatal_error("Dynamic stack realignment not supported for thumb1."); + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index 4eb0b6c..e8ed482 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "Thumb1RegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -182,7 +181,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, int Opc = 0; int ExtraOpc = 0; bool NeedCC = false; - bool NeedPred = false; if (DestReg == BaseReg && BaseReg == ARM::SP) { assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); @@ -217,7 +215,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } else { Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NumBits = 8; - NeedPred = NeedCC = true; + NeedCC = true; } isTwoAddr = true; } @@ -241,7 +239,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) + .setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) @@ -262,18 +261,15 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(DestReg).addImm(ThisVal); - if (NeedPred) - MIB = AddDefaultPred(MIB); + MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); - } - else { + } else { bool isKill = BaseReg != ARM::SP; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - if (NeedPred) - MIB = AddDefaultPred(MIB); + MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); BaseReg = DestReg; @@ -285,7 +281,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Scale = 1; Chunk = ((1 << NumBits) - 1) * Scale; Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NeedPred = NeedCC = isTwoAddr = true; + NeedCC = isTwoAddr = true; } } } @@ -405,7 +401,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned Scale = 1; if (FrameReg != ARM::SP) { Opcode = ARM::tADDi3; - MI.setDesc(TII.get(Opcode)); NumBits = 3; } else { NumBits = 8; @@ -419,10 +414,9 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset and add predicate operands. + // Remove offset MI.RemoveOperand(FrameRegIdx+1); MachineInstrBuilder MIB(&MI); - AddDefaultPred(MIB); return true; } @@ -431,6 +425,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp if (Opcode == ARM::tADDi3) { + MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) @@ -459,6 +454,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) if (Opcode == ARM::tADDi3) { + MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); @@ -479,10 +475,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.setDesc(TII.get(ARM::tADDhirr)); MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); - if (Opcode == ARM::tADDi3) { - MachineInstrBuilder MIB(&MI); - AddDefaultPred(MIB); - } } return true; } else { @@ -545,9 +537,9 @@ Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - bool Done = false; - Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); + bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); assert (Done && "Unable to resolve frame index!"); + (void)Done; } /// saveScavengerRegister - Spill the register so it can be used by the diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 360ec00..b627400 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -124,6 +124,27 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, if (Uses.count(DstReg) || Defs.count(SrcReg)) return false; + // If the CPSR is defined by this copy, then we don't want to move it. E.g., + // if we have: + // + // movs r1, r1 + // rsb r1, 0 + // movs r2, r2 + // rsb r2, 0 + // + // we don't want this to be converted to: + // + // movs r1, r1 + // movs r2, r2 + // itt mi + // rsb r1, 0 + // rsb r2, 0 + // + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef() && + MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) + return false; + // Then peek at the next instruction to see if it's predicated on CC or OCC. // If not, then there is nothing to be gained by moving the copy. MachineBasicBlock::iterator I = MI; ++I; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 51b56aa..cf040c82 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -14,9 +14,9 @@ #include "Thumb2InstrInfo.h" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -122,7 +122,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || + RC == ARM::GPRnopcRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -149,7 +150,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || + RC == ARM::GPRnopcRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -233,9 +235,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - // FIXME: Fix Thumb1 immediate encoding. - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags)); NumBytes = 0; continue; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index c741a6e..89a155c 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -9,11 +9,11 @@ #define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -97,11 +97,11 @@ namespace { { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. @@ -507,6 +507,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. + AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); @@ -546,6 +547,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, } case ARM::t2RSBri: case ARM::t2RSBSri: + case ARM::t2SXTB: + case ARM::t2SXTH: + case ARM::t2UXTB: + case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; @@ -742,7 +747,11 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || - MCID.getOpcode() == ARM::t2RSBri) && i == 2) + MCID.getOpcode() == ARM::t2RSBri || + MCID.getOpcode() == ARM::t2SXTB || + MCID.getOpcode() == ARM::t2SXTH || + MCID.getOpcode() == ARM::t2UXTB || + MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp index 46ae286..5dce06a 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp @@ -26,8 +26,8 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 7b91fea..f877c65 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -80,7 +80,7 @@ namespace { // Otherwise we don't know that the it's okay to zapnot this entire // byte. Only do this iff we can prove that the missing bits are // already null, so the bytezap doesn't need to really null them. - BitsToCheck |= ~Constant & (0xFF << 8*i); + BitsToCheck |= ~Constant & (0xFFULL << 8*i); } } } @@ -114,9 +114,8 @@ namespace { if (!x) return 0; unsigned at = CountLeadingZeros_64(x); uint64_t complow = 1ULL << (63 - at); - uint64_t comphigh = 1ULL << (64 - at); - //cerr << x << ":" << complow << ":" << comphigh << "\n"; - if (abs64(complow - x) <= abs64(comphigh - x)) + uint64_t comphigh = complow << 1; + if (x - complow <= comphigh - x) return complow; else return comphigh; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index de003fb..3057eb8 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -49,6 +49,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) // Set up the TargetLowering object. //I am having problems with shr n i8 1 setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); @@ -153,6 +154,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setStackPointerRegisterToSaveRestore(Alpha::R30); setJumpBufSize(272); @@ -160,10 +164,12 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setMinFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); } -MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const { +EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i64; } diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h index 13383f4..80f8efa 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h @@ -66,7 +66,7 @@ namespace llvm { virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } /// getSetCCResultType - Get the SETCC result ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp index 4dcec8f..8df2ed7 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -16,7 +16,6 @@ #include "AlphaMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td index b201712..c8c9377 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td +++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td @@ -607,6 +607,8 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)), def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), (MB)>; +def : Pat<(atomic_fence (imm), (imm)), (MB)>; + //Basic Floating point ops //Floats diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp index df8f157..8b6230f 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -40,8 +39,7 @@ using namespace llvm; AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(), - TII(tii) { + : AlphaGenRegisterInfo(Alpha::R26), TII(tii) { } static long getUpper16(long l) { @@ -178,10 +176,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned AlphaRegisterInfo::getRARegister() const { - return Alpha::R26; -} - unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -198,16 +192,6 @@ unsigned AlphaRegisterInfo::getEHHandlerRegister() const { return 0; } -int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - -int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - std::string AlphaRegisterInfo::getPrettyName(unsigned reg) { std::string s(AlphaRegDesc[reg].Name); diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h index 1072bf7..e35be27 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h @@ -42,16 +42,12 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - static std::string getPrettyName(unsigned reg); }; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp index 624a5e2..bd55ce9 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp @@ -13,7 +13,6 @@ #include "AlphaSubtarget.h" #include "Alpha.h" -#include "llvm/Target/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp index 3b65d41..fc9a677 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -14,7 +14,7 @@ #include "AlphaTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeAlphaTarget() { @@ -22,19 +22,17 @@ extern "C" void LLVMInitializeAlphaTarget() { RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget); } -AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout("e-f128:128:128-n64"), FrameLowering(Subtarget), Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this) { - setRelocationModel(Reloc::PIC_); } - //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h index cf00e58..48bb948 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h @@ -36,8 +36,9 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaSelectionDAGInfo TSInfo; public: - AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + AlphaTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp index 562052b..4ad021c 100644 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "AlphaMCTargetDesc.h" #include "AlphaMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "AlphaGenInstrInfo.inc" @@ -36,8 +37,10 @@ static MCInstrInfo *createAlphaMCInstrInfo() { return X; } -extern "C" void LLVMInitializeAlphaMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); +static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAlphaMCRegisterInfo(X, Alpha::R26); + return X; } static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,11 +50,29 @@ static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeAlphaMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, - createAlphaMCSubtargetInfo); +static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(Reloc::PIC_, CM); + return X; } -extern "C" void LLVMInitializeAlphaMCAsmInfo() { +// Force static initialization. +extern "C" void LLVMInitializeAlphaTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget, + createAlphaMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, + createAlphaMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index ad0dd26..0000000 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMAlphaDesc - AlphaMCTargetDesc.cpp - AlphaMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile deleted file mode 100644 index d55175f..0000000 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp index f7099b9..bdc69e7 100644 --- a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Alpha.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; llvm::Target llvm::TheAlphaTarget; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp index 6ba258b..ed9844e 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp @@ -29,9 +29,9 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h index 726fa2c..169aa8e 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHA_FRAMEINFO_H -#define ALPHA_FRAMEINFO_H +#ifndef BLACKFIN_FRAMEINFO_H +#define BLACKFIN_FRAMEINFO_H #include "Blackfin.h" #include "BlackfinSubtarget.h" diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp index d572832..7d4c45f 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -42,6 +42,7 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); @@ -99,6 +100,7 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) // Blackfin has no intrinsics for these particular operations. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -134,7 +136,7 @@ const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const { +EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const { // SETCC always sets the CC register. Technically that is an i1 register, but // that type is not legal, so we treat it as an i32 register. return MVT::i32; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h index b65775b..90908ba 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h @@ -33,7 +33,7 @@ namespace llvm { public: BlackfinTargetLowering(TargetMachine &TM); virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp index d190ae7..c06a919 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -16,10 +16,10 @@ #include "Blackfin.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "BlackfinGenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index ae8ee9e..7135676 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -34,7 +34,7 @@ namespace bfinIntrinsic { } -std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, +std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys, unsigned numTys) const { static const char *const names[] = { #define GET_INTRINSIC_NAME_TABLE @@ -81,8 +81,8 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { #include "BlackfinGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES -static const FunctionType *getType(LLVMContext &Context, unsigned id) { - const Type *ResultTy = NULL; +static FunctionType *getType(LLVMContext &Context, unsigned id) { + Type *ResultTy = NULL; std::vector<Type*> ArgTys; bool IsVarArg = false; @@ -94,7 +94,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) { } Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - const Type **Tys, + Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded"); AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID); diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h index 7c4b5a9..f05db5a 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h @@ -19,11 +19,11 @@ namespace llvm { class BlackfinIntrinsicInfo : public TargetIntrinsicInfo { public: - std::string getName(unsigned IntrID, const Type **Tys = 0, + std::string getName(unsigned IntrID, Type **Tys = 0, unsigned numTys = 0) const; unsigned lookupName(const char *Name, unsigned Len) const; bool isOverloaded(unsigned IID) const; - Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0, unsigned numTys = 0) const; }; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 3a7c104..0d415c5 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -37,7 +36,7 @@ using namespace llvm; BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii) - : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {} + : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {} const unsigned* BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -327,10 +326,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned BlackfinRegisterInfo::getRARegister() const { - return BF::RETS; -} - unsigned BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -347,14 +342,3 @@ unsigned BlackfinRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - -int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, - bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h index 86f45c1..6ac22af 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -53,15 +53,11 @@ namespace llvm { int SPAdj, RegScavenger *RS = NULL) const; unsigned getFrameRegister(const MachineFunction &MF) const; - unsigned getRARegister() const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - // Utility functions void adjustRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp index ec919cd..0bdce09 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp @@ -13,7 +13,7 @@ #include "BlackfinSubtarget.h" #include "Blackfin.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp index a1c9f1c..a4ae46b 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -13,7 +13,7 @@ #include "BlackfinTargetMachine.h" #include "Blackfin.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -22,10 +22,12 @@ extern "C" void LLVMInitializeBlackfinTarget() { } BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, + StringRef CPU, + StringRef FS, + Reloc::Model RM, + CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, CPU, FS), TLInfo(*this), diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h index bd7dc84..c85337fe2 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -35,8 +35,9 @@ namespace llvm { BlackfinFrameLowering FrameLowering; BlackfinIntrinsicInfo IntrinsicInfo; public: - BlackfinTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + BlackfinTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp index 0fa1471..272e3c2 100644 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "BlackfinMCTargetDesc.h" #include "BlackfinMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "BlackfinGenInstrInfo.inc" @@ -36,12 +37,12 @@ static MCInstrInfo *createBlackfinMCInstrInfo() { return X; } -extern "C" void LLVMInitializeBlackfinMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, - createBlackfinMCInstrInfo); +static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitBlackfinMCRegisterInfo(X, BF::RETS); + return X; } - static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { @@ -50,11 +51,31 @@ static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, return X; } -extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, - createBlackfinMCSubtargetInfo); +static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeBlackfinMCAsmInfo() { +// Force static initialization. +extern "C" void LLVMInitializeBlackfinTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget, + createBlackfinMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, + createBlackfinMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget, + createBlackfinMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, + createBlackfinMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 8cd924f..0000000 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMBlackfinDesc - BlackfinMCTargetDesc.cpp - BlackfinMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile deleted file mode 100644 index 6b26101..0000000 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp index 402e0af..57f1d3e 100644 --- a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Blackfin.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp index 415beb1..69d8c46 100644 --- a/contrib/llvm/lib/Target/CBackend/CBackend.cpp +++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp @@ -37,10 +37,11 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" @@ -48,6 +49,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Host.h" #include "llvm/Config/config.h" #include <algorithm> @@ -62,12 +64,6 @@ extern "C" void LLVMInitializeCBackendTarget() { RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); } -extern "C" void LLVMInitializeCBackendMCAsmInfo() {} - -extern "C" void LLVMInitializeCBackendMCInstrInfo() {} - -extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {} - namespace { class CBEMCAsmInfo : public MCAsmInfo { public: @@ -86,6 +82,8 @@ namespace { LoopInfo *LI; const Module *TheModule; const MCAsmInfo* TAsm; + const MCRegisterInfo *MRI; + const MCObjectFileInfo *MOFI; MCContext *TCtx; const TargetData* TD; @@ -99,14 +97,14 @@ namespace { /// UnnamedStructIDs - This contains a unique ID for each struct that is /// either anonymous or has no name. - DenseMap<const StructType*, unsigned> UnnamedStructIDs; + DenseMap<StructType*, unsigned> UnnamedStructIDs; public: static char ID; explicit CWriter(formatted_raw_ostream &o) : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), - NextAnonValueNumber(0) { + TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0), + OpaqueCounter(0), NextAnonValueNumber(0) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); FPCounter = 0; } @@ -145,6 +143,8 @@ namespace { delete Mang; delete TCtx; delete TAsm; + delete MRI; + delete MOFI; FPConstantMap.clear(); ByValParams.clear(); intrinsicPrototypesAlreadyGenerated.clear(); @@ -152,20 +152,20 @@ namespace { return false; } - raw_ostream &printType(raw_ostream &Out, const Type *Ty, + raw_ostream &printType(raw_ostream &Out, Type *Ty, bool isSigned = false, const std::string &VariableName = "", bool IgnoreName = false, const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, + raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar = ""); void printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, - const PointerType *Ty); + PointerType *Ty); - std::string getStructName(const StructType *ST); + std::string getStructName(StructType *ST); /// writeOperandDeref - Print the result of dereferencing the specified /// operand with '*'. This is equivalent to printing '*' then using @@ -188,7 +188,7 @@ namespace { void writeOperandWithCast(Value* Operand, const ICmpInst &I); bool writeInstructionCast(const Instruction &I); - void writeMemoryAccess(Value *Operand, const Type *OperandType, + void writeMemoryAccess(Value *Operand, Type *OperandType, bool IsVolatile, unsigned Alignment); private : @@ -200,7 +200,7 @@ namespace { void printIntrinsicDefinition(const Function &F, raw_ostream &Out); void printModuleTypes(); - void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &); + void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &); void printFloatingPointConstants(Function &F); void printFloatingPointConstants(const Constant *C); void printFunctionSignature(const Function *F, bool Prototype); @@ -209,7 +209,7 @@ namespace { void printBasicBlock(BasicBlock *BB); void printLoop(Loop *L); - void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy); + void printCast(unsigned opcode, Type *SrcTy, Type *DstTy); void printConstant(Constant *CPV, bool Static); void printConstantWithCast(Constant *CPV, unsigned Opcode); bool printConstExprCast(const ConstantExpr *CE, bool Static); @@ -288,10 +288,12 @@ namespace { void visitInvokeInst(InvokeInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } - void visitUnwindInst(UnwindInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } + void visitResumeInst(ResumeInst &I) { + llvm_unreachable("DwarfEHPrepare pass didn't work!"); + } void visitUnreachableInst(UnreachableInst &I); void visitPHINode(PHINode &I); @@ -360,8 +362,8 @@ static std::string CBEMangle(const std::string &S) { return Result; } -std::string CWriter::getStructName(const StructType *ST) { - if (!ST->isAnonymous() && !ST->getName().empty()) +std::string CWriter::getStructName(StructType *ST) { + if (!ST->isLiteral() && !ST->getName().empty()) return CBEMangle("l_"+ST->getName().str()); return "l_unnamed_" + utostr(UnnamedStructIDs[ST]); @@ -373,20 +375,20 @@ std::string CWriter::getStructName(const StructType *ST) { /// print it as "Struct (*)(...)", for struct return functions. void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, - const PointerType *TheTy) { - const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); + PointerType *TheTy) { + FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); std::string tstr; raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (*) ("; bool PrintedType = false; FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - const Type *RetTy = cast<PointerType>(*I)->getElementType(); + Type *RetTy = cast<PointerType>(*I)->getElementType(); unsigned Idx = 1; for (++I, ++Idx; I != E; ++I, ++Idx) { if (PrintedType) FunctionInnards << ", "; - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -408,7 +410,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, } raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, +CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar) { assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && "Invalid type for printSimpleType"); @@ -444,7 +446,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, " __attribute__((vector_size(64))) " + NameSoFar); case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); + VectorType *VTy = cast<VectorType>(Ty); return printSimpleType(Out, VTy->getElementType(), isSigned, " __attribute__((vector_size(" + utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); @@ -461,7 +463,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, +raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { @@ -471,14 +473,14 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, switch (Ty->getTypeID()) { case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); + FunctionType *FTy = cast<FunctionType>(Ty); std::string tstr; raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (" << NameSoFar << ") ("; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) { - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -502,7 +504,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, return Out; } case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); + StructType *STy = cast<StructType>(Ty); // Check to see if the type is named. if (!IgnoreName) @@ -523,7 +525,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, } case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); + PointerType *PTy = cast<PointerType>(Ty); std::string ptrName = "*" + NameSoFar; if (PTy->getElementType()->isArrayTy() || @@ -537,7 +539,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, } case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); unsigned NumElements = ATy->getNumElements(); if (NumElements == 0) NumElements = 1; // Arrays are wrapped in structs to allow them to have normal @@ -560,7 +562,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. // - const Type *ETy = CPA->getType()->getElementType(); + Type *ETy = CPA->getType()->getElementType(); bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) || ETy == Type::getInt8Ty(CPA->getContext())); @@ -682,7 +684,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) { /// Print out the casting for a cast operation. This does the double casting /// necessary for conversion to the destination type, if necessary. /// @brief Print a cast -void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { +void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { // Print the destination type cast switch (opc) { case Instruction::UIToFP: @@ -917,7 +919,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { - const Type* Ty = CI->getType(); + Type* Ty = CI->getType(); if (Ty == Type::getInt1Ty(CPV->getContext())) Out << (CI->getZExtValue() ? '1' : '0'); else if (Ty == Type::getInt32Ty(CPV->getContext())) @@ -1027,7 +1029,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { printConstantArray(CA, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - const ArrayType *AT = cast<ArrayType>(CPV->getType()); + ArrayType *AT = cast<ArrayType>(CPV->getType()); Out << '{'; if (AT->getNumElements()) { Out << ' '; @@ -1054,7 +1056,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { printConstantVector(CV, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - const VectorType *VT = cast<VectorType>(CPV->getType()); + VectorType *VT = cast<VectorType>(CPV->getType()); Out << "{ "; Constant *CZ = Constant::getNullValue(VT->getElementType()); printConstant(CZ, Static); @@ -1074,7 +1076,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ")"; } if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { - const StructType *ST = cast<StructType>(CPV->getType()); + StructType *ST = cast<StructType>(CPV->getType()); Out << '{'; if (ST->getNumElements()) { Out << ' '; @@ -1123,7 +1125,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { // care of detecting that case and printing the cast for the ConstantExpr. bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { bool NeedsExplicitCast = false; - const Type *Ty = CE->getOperand(0)->getType(); + Type *Ty = CE->getOperand(0)->getType(); bool TypeIsSigned = false; switch (CE->getOpcode()) { case Instruction::Add: @@ -1175,7 +1177,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { // Extract the operand's type, we'll need it. - const Type* OpTy = CPV->getType(); + Type* OpTy = CPV->getType(); // Indicate whether to do the cast or not. bool shouldCast = false; @@ -1267,7 +1269,7 @@ std::string CWriter::GetValueName(const Value *Operand) { void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && @@ -1330,7 +1332,7 @@ void CWriter::writeOperand(Value *Operand, bool Static) { // This function takes care of detecting that case and printing the cast // for the Instruction. bool CWriter::writeInstructionCast(const Instruction &I) { - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); switch (I.getOpcode()) { case Instruction::Add: case Instruction::Sub: @@ -1362,7 +1364,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) { void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { // Extract the operand's type, we'll need it. - const Type* OpTy = Operand->getType(); + Type* OpTy = Operand->getType(); // Indicate whether to do the cast or not. bool shouldCast = false; @@ -1430,7 +1432,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { bool castIsSigned = Cmp.isSigned(); // If the operand was a pointer, convert to a large integer type. - const Type* OpTy = Operand->getType(); + Type* OpTy = Operand->getType(); if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); @@ -1665,7 +1667,8 @@ bool CWriter::doInitialization(Module &M) { TAsm = Match->createMCAsmInfo(Triple); #endif TAsm = new CBEMCAsmInfo(); - TCtx = new MCContext(*TAsm, NULL); + MRI = new MCRegisterInfo(); + TCtx = new MCContext(*TAsm, *MRI, NULL); Mang = new Mangler(*TCtx, *TD); // Keep track of which functions are static ctors/dtors so they can have @@ -2049,7 +2052,7 @@ void CWriter::printModuleTypes() { for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *ST = StructTypes[i]; - if (ST->isAnonymous() || ST->getName().empty()) + if (ST->isLiteral() || ST->getName().empty()) UnnamedStructIDs[ST] = NextTypeID++; std::string Name = getStructName(ST); @@ -2060,7 +2063,7 @@ void CWriter::printModuleTypes() { Out << '\n'; // Keep track of which structures have been printed so far. - SmallPtrSet<const Type *, 16> StructPrinted; + SmallPtrSet<Type *, 16> StructPrinted; // Loop over all structures then push them into the stack so they are // printed in the correct order. @@ -2077,8 +2080,8 @@ void CWriter::printModuleTypes() { // // TODO: Make this work properly with vector types // -void CWriter::printContainedStructs(const Type *Ty, - SmallPtrSet<const Type *, 16> &StructPrinted) { +void CWriter::printContainedStructs(Type *Ty, + SmallPtrSet<Type *, 16> &StructPrinted) { // Don't walk through pointers. if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) return; @@ -2088,7 +2091,7 @@ void CWriter::printContainedStructs(const Type *Ty, E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - if (const StructType *ST = dyn_cast<StructType>(Ty)) { + if (StructType *ST = dyn_cast<StructType>(Ty)) { // Check to see if we have already printed this struct. if (!StructPrinted.insert(Ty)) return; @@ -2120,7 +2123,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { } // Loop over the arguments, printing them... - const FunctionType *FT = cast<FunctionType>(F->getFunctionType()); + FunctionType *FT = cast<FunctionType>(F->getFunctionType()); const AttrListPtr &PAL = F->getAttributes(); std::string tstr; @@ -2150,7 +2153,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { ArgName = GetValueName(I); else ArgName = ""; - const Type *ArgTy = I->getType(); + Type *ArgTy = I->getType(); if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { ArgTy = cast<PointerType>(ArgTy)->getElementType(); ByValParams.insert(I); @@ -2177,7 +2180,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { for (; I != E; ++I) { if (PrintedArg) FunctionInnards << ", "; - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -2205,7 +2208,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { FunctionInnards << ')'; // Get the return tpe for the function. - const Type *RetTy; + Type *RetTy; if (!isStructReturn) RetTy = F->getReturnType(); else { @@ -2222,8 +2225,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { static inline bool isFPIntBitCast(const Instruction &I) { if (!isa<BitCastInst>(I)) return false; - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DstTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DstTy = I.getType(); return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } @@ -2237,7 +2240,7 @@ void CWriter::printFunction(Function &F) { // If this is a struct return function, handle the result with magic. if (isStructReturn) { - const Type *StructTy = + Type *StructTy = cast<PointerType>(F.arg_begin()->getType())->getElementType(); Out << " "; printType(Out, StructTy, false, "StructReturn"); @@ -2380,22 +2383,29 @@ void CWriter::visitReturnInst(ReturnInst &I) { void CWriter::visitSwitchInst(SwitchInst &SI) { + Value* Cond = SI.getCondition(); + Out << " switch ("; - writeOperand(SI.getOperand(0)); + writeOperand(Cond); Out << ") {\n default:\n"; printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); Out << ";\n"; - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) { + + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + BasicBlock* Succ = SI.getSuccessor(i); Out << " case "; - writeOperand(SI.getOperand(i)); + writeOperand(CaseVal); Out << ":\n"; - BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1)); printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } + Out << " }\n"; } @@ -2656,7 +2666,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) { Out << ")"; } -static const char * getFloatBitCastField(const Type *Ty) { +static const char * getFloatBitCastField(Type *Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Invalid Type"); case Type::FloatTyID: return "Float"; @@ -2672,8 +2682,8 @@ static const char * getFloatBitCastField(const Type *Ty) { } void CWriter::visitCastInst(CastInst &I) { - const Type *DstTy = I.getType(); - const Type *SrcTy = I.getOperand(0)->getType(); + Type *DstTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); if (isFPIntBitCast(I)) { Out << '('; // These int<->float and long<->double casts need to be handled specially @@ -2719,7 +2729,7 @@ void CWriter::visitSelectInst(SelectInst &I) { // Returns the macro name or value of the max or min of an integer type // (as defined in limits.h). -static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax, +static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax, raw_ostream &Out) { const char* type; const char* sprefix = ""; @@ -2745,16 +2755,16 @@ static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax, } #ifndef NDEBUG -static bool isSupportedIntegerSize(const IntegerType &T) { +static bool isSupportedIntegerSize(IntegerType &T) { return T.getBitWidth() == 8 || T.getBitWidth() == 16 || T.getBitWidth() == 32 || T.getBitWidth() == 64; } #endif void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) { - const FunctionType *funT = F.getFunctionType(); - const Type *retT = F.getReturnType(); - const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); + FunctionType *funT = F.getFunctionType(); + Type *retT = F.getReturnType(); + IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); assert(isSupportedIntegerSize(*elemT) && "CBackend does not support arbitrary size integers."); @@ -2829,7 +2839,6 @@ void CWriter::lowerIntrinsics(Function &F) { if (Function *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { case Intrinsic::not_intrinsic: - case Intrinsic::memory_barrier: case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: @@ -2908,8 +2917,8 @@ void CWriter::visitCallInst(CallInst &I) { Value *Callee = I.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); // If this is a call to a struct-return function, assign to the first // parameter instead of passing it to the call. @@ -3020,9 +3029,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, WroteCallee = true; return false; } - case Intrinsic::memory_barrier: - Out << "__sync_synchronize()"; - return true; case Intrinsic::vastart: Out << "0; "; @@ -3217,7 +3223,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { std::vector<std::pair<Value*, int> > ResultVals; if (CI.getType() == Type::getVoidTy(CI.getContext())) ; - else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) { + else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) ResultVals.push_back(std::make_pair(&CI, (int)i)); } else { @@ -3348,7 +3354,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, // Find out if the last index is into a vector. If so, we have to print this // specially. Since vectors can't have elements of indexable type, only the // last index could possibly be of a vector element. - const VectorType *LastIndexIsVector = 0; + VectorType *LastIndexIsVector = 0; { for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) LastIndexIsVector = dyn_cast<VectorType>(*TmpI); @@ -3421,7 +3427,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, Out << ")"; } -void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType, +void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, bool IsVolatile, unsigned Alignment) { bool IsUnaligned = Alignment && @@ -3463,7 +3469,7 @@ void CWriter::visitStoreInst(StoreInst &I) { Out << " = "; Value *Operand = I.getOperand(0); Constant *BitMask = 0; - if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) + if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) if (!ITy->isPowerOf2ByteWidth()) // We have a bit width that doesn't match an even power-of-2 byte // size. Consequently we must & the value with the type's bit mask @@ -3492,7 +3498,7 @@ void CWriter::visitVAArgInst(VAArgInst &I) { } void CWriter::visitInsertElementInst(InsertElementInst &I) { - const Type *EltTy = I.getType()->getElementType(); + Type *EltTy = I.getType()->getElementType(); writeOperand(I.getOperand(0)); Out << ";\n "; Out << "(("; @@ -3507,7 +3513,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) { void CWriter::visitExtractElementInst(ExtractElementInst &I) { // We know that our operand is not inlined. Out << "(("; - const Type *EltTy = + Type *EltTy = cast<VectorType>(I.getOperand(0)->getType())->getElementType(); printType(Out, PointerType::getUnqual(EltTy)); Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; @@ -3519,9 +3525,9 @@ void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Out << "("; printType(Out, SVI.getType()); Out << "){ "; - const VectorType *VT = SVI.getType(); + VectorType *VT = SVI.getType(); unsigned NumElts = VT->getNumElements(); - const Type *EltTy = VT->getElementType(); + Type *EltTy = VT->getElementType(); for (unsigned i = 0; i != NumElts; ++i) { if (i) Out << ", "; @@ -3557,9 +3563,9 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) { Out << GetValueName(&IVI); for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); i != e; ++i) { - const Type *IndexedTy = + Type *IndexedTy = ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - ArrayRef<unsigned>(b, i+1)); + makeArrayRef(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else @@ -3579,9 +3585,9 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { Out << GetValueName(EVI.getOperand(0)); for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); i != e; ++i) { - const Type *IndexedTy = + Type *IndexedTy = ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - ArrayRef<unsigned>(b, i+1)); + makeArrayRef(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h index e64216b..4f1ca97 100644 --- a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h +++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h @@ -20,8 +20,9 @@ namespace llvm { struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) + CTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp index f7e8ff2..e8274ff 100644 --- a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp @@ -9,7 +9,7 @@ #include "CTargetMachine.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCBackendTarget; @@ -17,3 +17,5 @@ Target llvm::TheCBackendTarget; extern "C" void LLVMInitializeCBackendTargetInfo() { RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); } + +extern "C" void LLVMInitializeCBackendTargetMC() {} diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 85fb258..0000000 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMCellSPUDesc - SPUMCTargetDesc.cpp - SPUMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile deleted file mode 100644 index 10d9a42..0000000 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp index 26c5a4b..d5af2a8 100644 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "SPUMCTargetDesc.h" #include "SPUMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SPUGenInstrInfo.inc" @@ -35,8 +37,10 @@ static MCInstrInfo *createSPUMCInstrInfo() { return X; } -extern "C" void LLVMInitializeCellSPUMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); +static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSPUMCRegisterInfo(X, SPU::R0); + return X; } static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,11 +50,43 @@ static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, - createSPUMCSubtargetInfo); +static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM); + return X; } -extern "C" void LLVMInitializeCellSPUMCAsmInfo() { - RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget); +// Force static initialization. +extern "C" void LLVMInitializeCellSPUTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget, + createSPUMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget, + createCellSPUMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, + createSPUMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h index c5c037d..a3717b0 100644 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file provides Alpha specific target descriptions. +// This file provides CellSPU specific target descriptions. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp index fd96694..90b5270 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -29,10 +29,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp index a3e7e73..093f99f 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -181,18 +181,6 @@ void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { MachineLocation FPSrc(MachineLocation::VirtualFP); Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); } - } else { - // This is a leaf function -- insert a branch hint iff there are - // sufficient number instructions in the basic block. Note that - // this is just a best guess based on the basic block's size. - if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - dl = MBBI->getDebugLoc(); - - // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) - .addSym(MMI.getContext().CreateTempSymbol()); - } } } @@ -249,14 +237,6 @@ void SPUFrameLowering::emitEpilogue(MachineFunction &MF, } } -void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(SPU::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const{ // Mark LR and SP unused, since the prolog spills them to stack and diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h index 4fee72d..b837f2c 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h @@ -43,9 +43,6 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - //! Perform target-specific stack frame setup. - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - //! Return a function's saved spill slots /*! For CellSPU, a function's saved spill slots is just the link register. @@ -77,17 +74,6 @@ namespace llvm { static int FItoStackOffset(int frame_index) { return frame_index * stackSlotSize(); } - //! Number of instructions required to overcome hint-for-branch latency - /*! - HBR (hint-for-branch) instructions can be inserted when, for example, - we know that a given function is going to be called, such as printf(), - in the control flow graph. HBRs are only inserted if a sufficient number - of instructions occurs between the HBR and the target. Currently, HBRs - take 6 cycles, ergo, the magic number 6. - */ - static int branchHintPenalty() { - return 6; - } }; } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index f0ceee2..ac33111 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -69,7 +69,7 @@ namespace { TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { EVT ArgVT = Op.getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; @@ -80,7 +80,7 @@ namespace { TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = + Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -174,6 +174,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); // SPU has no division/remainder instructions setOperationAction(ISD::SREM, MVT::i8, Expand); @@ -401,6 +402,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Set operation actions to legal types only. + if (!isTypeLegal(VT)) continue; + // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); @@ -438,6 +442,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? setStackPointerRegisterToSaveRestore(SPU::R1); @@ -497,7 +502,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const // Return the Cell SPU's SETCC result type //===----------------------------------------------------------------------===// -MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { +EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { // i8, i16 and i32 are valid SETCC result types MVT::SimpleValueType retval; @@ -2727,6 +2732,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) // the type to extend from needs to be i64 or i32. assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + (void)OpVT; // Create shuffle mask unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 @@ -3216,7 +3222,7 @@ SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode. bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, - const Type *Ty) const { + Type *Ty) const { // SPU's addresses are 256K: return (V > -(1 << 18) && V < (1 << 18) - 1); } @@ -3239,7 +3245,7 @@ bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { bool SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type * ) const{ + Type * ) const{ // A-form: 18bit absolute address. if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h index d23f6cc..aa4a168 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h @@ -107,7 +107,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ValueType for ISD::SETCC - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } @@ -147,7 +147,7 @@ namespace llvm { /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode. - virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; virtual bool isLegalAddressImmediate(GlobalValue *) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -179,7 +179,7 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; virtual bool isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const; + Type *Ty) const; }; } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp index e67b10c..007bc0e 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -17,9 +17,9 @@ #include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR @@ -290,6 +290,8 @@ static void removeHBR( MachineBasicBlock &MBB) { if (I->getOpcode() == SPU::HBRA || I->getOpcode() == SPU::HBR_LABEL){ I=MBB.erase(I); + if (I == MBB.end()) + break; } } } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td index e103c9b..f76ebd7 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td @@ -1594,8 +1594,8 @@ multiclass BitwiseOrImm { def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>; - def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val), - [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>; + def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; // i16i32: hacked version of the ori instruction to extend 16-bit quantities // to 32-bit quantities. used exclusively to match "anyext" conversions (vide @@ -3467,8 +3467,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { [/* no pattern */]>; // Indirect branch - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + let isIndirectBranch = 1 in { + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + } } // Conditional branches: diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp index 19896c0..bbac6fd 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" @@ -187,7 +186,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii) : - SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii) + SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii) { } @@ -311,28 +310,12 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } unsigned -SPURegisterInfo::getRARegister() const -{ - return SPU::R0; -} - -unsigned SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SPU::R1; } int -SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); -} - -int SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const { switch(dFormOpcode) diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h index 5e014f8..b7818a4 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h @@ -74,8 +74,6 @@ namespace llvm { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - //! Get return address register (LR, aka R0) - unsigned getRARegister() const; //! Get the stack frame register (SP, aka R1) unsigned getFrameRegister(const MachineFunction &MF) const; @@ -83,10 +81,6 @@ namespace llvm { // New methods added: //------------------------------------------------------------------------ - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - //! Convert D-form load/store to X-form load/store /*! Converts a regiser displacement load/store into a register-indexed diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp index 856dc82..43335ab 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp @@ -14,7 +14,7 @@ #include "SPUSubtarget.h" #include "SPU.h" #include "SPURegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp index 3542a2b..93a7f6e 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -16,7 +16,8 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -31,9 +32,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { return &LR[0]; } -SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU,const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), @@ -41,9 +43,6 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - // For the time being, use static relocations, since there's really no - // support for PIC yet. - setRelocationModel(Reloc::Static); } //===----------------------------------------------------------------------===// @@ -59,8 +58,16 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, // passes to run just before printing the assembly bool SPUTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) -{ +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // load the TCE instruction scheduler, if available via + // loaded plugins + typedef llvm::FunctionPass* (*BuilderFunc)(const char*); + BuilderFunc schedulerCreator = + (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( + "createTCESchedulerPass"); + if (schedulerCreator != NULL) + PM.add(schedulerCreator("cellspu")); + //align instructions with nops/lnops for dual issue PM.add(createSPUNopFillerPass(*this)); return true; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h index d96f86d..fffe77c 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h @@ -38,8 +38,9 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: - SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp index 049ea23..84aadfa 100644 --- a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp @@ -9,7 +9,7 @@ #include "SPU.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCellSPUTarget; diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp index 10d18f6..ae0e3c4 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp +++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp @@ -29,7 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include <algorithm> @@ -77,22 +77,12 @@ extern "C" void LLVMInitializeCppBackendTarget() { RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget); } -extern "C" void LLVMInitializeCppBackendMCAsmInfo() {} - -extern "C" void LLVMInitializeCppBackendMCInstrInfo() { - RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget); -} - -extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() { - RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget); -} - namespace { - typedef std::vector<const Type*> TypeList; - typedef std::map<const Type*,std::string> TypeMap; + typedef std::vector<Type*> TypeList; + typedef std::map<Type*,std::string> TypeMap; typedef std::map<const Value*,std::string> ValueMap; typedef std::set<std::string> NameSet; - typedef std::set<const Type*> TypeSet; + typedef std::set<Type*> TypeSet; typedef std::set<const Value*> ValueSet; typedef std::map<const Value*,std::string> ForwardRefMap; @@ -143,14 +133,14 @@ namespace { void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); - std::string getCppName(const Type* val); - inline void printCppName(const Type* val); + std::string getCppName(Type* val); + inline void printCppName(Type* val); std::string getCppName(const Value* val); inline void printCppName(const Value* val); void printAttributes(const AttrListPtr &PAL, const std::string &name); - void printType(const Type* Ty); + void printType(Type* Ty); void printTypes(const Module* M); void printConstant(const Constant *CPV); @@ -164,7 +154,7 @@ namespace { void printFunctionHead(const Function *F); void printFunctionBody(const Function *F); void printInstruction(const Instruction *I, const std::string& bbname); - std::string getOpName(Value*); + std::string getOpName(const Value*); void printModuleBody(); }; @@ -184,7 +174,7 @@ static inline void sanitize(std::string &str) { str[i] = '_'; } -static std::string getTypePrefix(const Type *Ty) { +static std::string getTypePrefix(Type *Ty) { switch (Ty->getTypeID()) { case Type::VoidTyID: return "void_"; case Type::IntegerTyID: @@ -339,7 +329,7 @@ void CppWriter::printEscapedString(const std::string &Str) { } } -std::string CppWriter::getCppName(const Type* Ty) { +std::string CppWriter::getCppName(Type* Ty) { // First, handle the primitive types .. easy if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { switch (Ty->getTypeID()) { @@ -379,7 +369,7 @@ std::string CppWriter::getCppName(const Type* Ty) { // See if the type has a name in the symboltable and build accordingly std::string name; - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (STy->hasName()) name = STy->getName(); @@ -393,7 +383,7 @@ std::string CppWriter::getCppName(const Type* Ty) { return TypeNames[Ty] = name; } -void CppWriter::printCppName(const Type* Ty) { +void CppWriter::printCppName(Type* Ty) { printEscapedString(getCppName(Ty)); } @@ -480,6 +470,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(NoImplicitFloat); HANDLE_ATTR(Naked); HANDLE_ATTR(InlineHint); + HANDLE_ATTR(ReturnsTwice); + HANDLE_ATTR(UWTable); + HANDLE_ATTR(NonLazyBind); #undef HANDLE_ATTR if (attrs & Attribute::StackAlignment) Out << " | Attribute::constructStackAlignmentFromInt(" @@ -499,7 +492,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, } } -void CppWriter::printType(const Type* Ty) { +void CppWriter::printType(Type* Ty) { // We don't print definitions for primitive types if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return; @@ -514,13 +507,13 @@ void CppWriter::printType(const Type* Ty) { // Print the type definition switch (Ty->getTypeID()) { case Type::FunctionTyID: { - const FunctionType* FT = cast<FunctionType>(Ty); + FunctionType* FT = cast<FunctionType>(Ty); Out << "std::vector<Type*>" << typeName << "_args;"; nl(Out); FunctionType::param_iterator PI = FT->param_begin(); FunctionType::param_iterator PE = FT->param_end(); for (; PI != PE; ++PI) { - const Type* argTy = static_cast<const Type*>(*PI); + Type* argTy = static_cast<Type*>(*PI); printType(argTy); std::string argName(getCppName(argTy)); Out << typeName << "_args.push_back(" << argName; @@ -539,13 +532,21 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::StructTyID: { - const StructType* ST = cast<StructType>(Ty); - if (!ST->isAnonymous()) { - Out << "StructType *" << typeName << " = "; - Out << "StructType::createNamed(mod->getContext(), \""; + StructType* ST = cast<StructType>(Ty); + if (!ST->isLiteral()) { + Out << "StructType *" << typeName << " = mod->getTypeByName(\""; + printEscapedString(ST->getName()); + Out << "\");"; + nl(Out); + Out << "if (!" << typeName << ") {"; + nl(Out); + Out << typeName << " = "; + Out << "StructType::create(mod->getContext(), \""; printEscapedString(ST->getName()); Out << "\");"; nl(Out); + Out << "}"; + nl(Out); // Indicate that this type is now defined. DefinedTypes.insert(Ty); } @@ -555,7 +556,7 @@ void CppWriter::printType(const Type* Ty) { StructType::element_iterator EI = ST->element_begin(); StructType::element_iterator EE = ST->element_end(); for (; EI != EE; ++EI) { - const Type* fieldTy = static_cast<const Type*>(*EI); + Type* fieldTy = static_cast<Type*>(*EI); printType(fieldTy); std::string fieldName(getCppName(fieldTy)); Out << typeName << "_fields.push_back(" << fieldName; @@ -563,21 +564,27 @@ void CppWriter::printType(const Type* Ty) { nl(Out); } - if (ST->isAnonymous()) { + if (ST->isLiteral()) { Out << "StructType *" << typeName << " = "; Out << "StructType::get(" << "mod->getContext(), "; } else { + Out << "if (" << typeName << "->isOpaque()) {"; + nl(Out); Out << typeName << "->setBody("; } Out << typeName << "_fields, /*isPacked=*/" << (ST->isPacked() ? "true" : "false") << ");"; nl(Out); + if (!ST->isLiteral()) { + Out << "}"; + nl(Out); + } break; } case Type::ArrayTyID: { - const ArrayType* AT = cast<ArrayType>(Ty); - const Type* ET = AT->getElementType(); + ArrayType* AT = cast<ArrayType>(Ty); + Type* ET = AT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -589,8 +596,8 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::PointerTyID: { - const PointerType* PT = cast<PointerType>(Ty); - const Type* ET = PT->getElementType(); + PointerType* PT = cast<PointerType>(Ty); + Type* ET = PT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -602,8 +609,8 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::VectorTyID: { - const VectorType* PT = cast<VectorType>(Ty); - const Type* ET = PT->getElementType(); + VectorType* PT = cast<VectorType>(Ty); + Type* ET = PT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -766,9 +773,7 @@ void CppWriter::printConstant(const Constant *CV) { Out << "Constant* " << constName << " = ConstantExpr::getGetElementPtr(" << getCppName(CE->getOperand(0)) << ", " - << "&" << constName << "_indices[0], " - << constName << "_indices.size()" - << ");"; + << constName << "_indices);"; } else if (CE->isCast()) { printConstant(CE->getOperand(0)); Out << "Constant* " << constName << " = ConstantExpr::getCast("; @@ -988,7 +993,7 @@ void CppWriter::printVariableBody(const GlobalVariable *GV) { } } -std::string CppWriter::getOpName(Value* V) { +std::string CppWriter::getOpName(const Value* V) { if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) return getCppName(V); @@ -1053,14 +1058,17 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::Switch: { const SwitchInst *SI = cast<SwitchInst>(I); Out << "SwitchInst* " << iName << " = SwitchInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " + << getOpName(SI->getCondition()) << ", " + << getOpName(SI->getDefaultDest()) << ", " << SI->getNumCases() << ", " << bbname << ");"; nl(Out); - for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { + unsigned NumCases = SI->getNumCases(); + for (unsigned i = 1; i < NumCases; ++i) { + const ConstantInt* CaseVal = SI->getCaseValue(i); + const BasicBlock* BB = SI->getSuccessor(i); Out << iName << "->addCase(" - << opNames[i] << ", " - << opNames[i+1] << ");"; + << getOpName(CaseVal) << ", " + << getOpName(BB) << ");"; nl(Out); } break; @@ -1076,6 +1084,11 @@ void CppWriter::printInstruction(const Instruction *I, } break; } + case Instruction::Resume: { + Out << "ResumeInst::Create(mod->getContext(), " << opNames[0] + << ", " << bbname << ");"; + break; + } case Instruction::Invoke: { const InvokeInst* inv = cast<InvokeInst>(I); Out << "std::vector<Value*> " << iName << "_params;"; @@ -1090,8 +1103,7 @@ void CppWriter::printInstruction(const Instruction *I, << getOpName(inv->getCalledFunction()) << ", " << getOpName(inv->getNormalDest()) << ", " << getOpName(inv->getUnwindDest()) << ", " - << iName << "_params.begin(), " - << iName << "_params.end(), \""; + << iName << "_params, \""; printEscapedString(inv->getName()); Out << "\", " << bbname << ");"; nl(Out) << iName << "->setCallingConv("; @@ -1252,8 +1264,7 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); } Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices.begin(), " - << iName << "_indices.end()"; + << opNames[0] << ", " << iName << "_indices"; } Out << ", \""; printEscapedString(gep->getName()); @@ -1304,7 +1315,7 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::PtrToInt: Out << "PtrToIntInst"; break; case Instruction::IntToPtr: Out << "IntToPtrInst"; break; case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(!"Unreachable"); break; + default: assert(0 && "Unreachable"); break; } Out << "(" << opNames[0] << ", " << getCppName(cst->getType()) << ", \""; @@ -1331,8 +1342,7 @@ void CppWriter::printInstruction(const Instruction *I, } Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[call->getNumArgOperands()] << ", " - << iName << "_params.begin(), " - << iName << "_params.end(), \""; + << iName << "_params, \""; } else if (call->getNumArgOperands() == 1) { Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; @@ -1415,7 +1425,7 @@ void CppWriter::printInstruction(const Instruction *I, Out << "ExtractValueInst* " << getCppName(evi) << " = ExtractValueInst::Create(" << opNames[0] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + << iName << "_indices, \""; printEscapedString(evi->getName()); Out << "\", " << bbname << ");"; break; @@ -1432,7 +1442,7 @@ void CppWriter::printInstruction(const Instruction *I, Out << "InsertValueInst* " << getCppName(ivi) << " = InsertValueInst::Create(" << opNames[0] << ", " << opNames[1] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + << iName << "_indices, \""; printEscapedString(ivi->getName()); Out << "\", " << bbname << ");"; break; @@ -1542,13 +1552,12 @@ void CppWriter::printFunctionUses(const Function* F) { void CppWriter::printFunctionHead(const Function* F) { nl(Out) << "Function* " << getCppName(F); - if (is_inline) { - Out << " = mod->getFunction(\""; - printEscapedString(F->getName()); - Out << "\", " << getCppName(F->getFunctionType()) << ");"; - nl(Out) << "if (!" << getCppName(F) << ") {"; - nl(Out) << getCppName(F); - } + Out << " = mod->getFunction(\""; + printEscapedString(F->getName()); + Out << "\");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + Out<< " = Function::Create("; nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; nl(Out) << "/*Linkage=*/"; @@ -1585,10 +1594,8 @@ void CppWriter::printFunctionHead(const Function* F) { Out << "->setGC(\"" << F->getGC() << "\");"; nl(Out); } - if (is_inline) { - Out << "}"; - nl(Out); - } + Out << "}"; + nl(Out); printAttributes(F->getAttributes(), getCppName(F)); printCppName(F); Out << "->setAttributes(" << getCppName(F) << "_PAL);"; @@ -1873,7 +1880,7 @@ void CppWriter::printVariable(const std::string& fname, void CppWriter::printType(const std::string &fname, const std::string &typeName) { - const Type* Ty = TheModule->getTypeByName(typeName); + Type* Ty = TheModule->getTypeByName(typeName); if (!Ty) { error(std::string("Type '") + typeName + "' not found in input module"); return; diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h index 7322e3e..287e537 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h +++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h @@ -22,8 +22,9 @@ namespace llvm { class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { - CPPTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) + CPPTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp index d0aeb12..a8ac0a2 100644 --- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -9,7 +9,7 @@ #include "CPPTargetMachine.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCppBackendTarget; @@ -24,3 +24,5 @@ extern "C" void LLVMInitializeCppBackendTargetInfo() { "C++ backend", &CppBackend_TripleMatchQuality); } + +extern "C" void LLVMInitializeCppBackendTargetMC() {} diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt deleted file mode 100644 index 87e7cb5..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeAsmParser - MBlazeAsmLexer.cpp - MBlazeAsmParser.cpp - ) - diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 1596596..2d357bb 100644 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -7,8 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" -#include "MBlazeTargetMachine.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -17,10 +16,10 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include <string> #include <map> @@ -29,7 +28,7 @@ using namespace llvm; namespace { - class MBlazeBaseAsmLexer : public TargetAsmLexer { + class MBlazeBaseAsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; const AsmToken &lexDefinite() { @@ -42,7 +41,7 @@ namespace { rmap_ty RegisterMap; - void InitRegisterMap(const TargetRegisterInfo *info) { + void InitRegisterMap(const MCRegisterInfo *info) { unsigned numRegs = info->getNumRegs(); for (unsigned i = 0; i < numRegs; ++i) { @@ -76,20 +75,16 @@ namespace { } public: MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : MCTargetAsmLexer(T), AsmInfo(MAI) { } }; class MBlazeAsmLexer : public MBlazeBaseAsmLexer { public: - MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI) + MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI, + const MCAsmInfo &MAI) : MBlazeBaseAsmLexer(T, MAI) { - std::string tripleString("mblaze-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; } @@ -123,6 +118,6 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() { } extern "C" void LLVMInitializeMBlazeAsmLexer() { - RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); + RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); } diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index eebd9d8..97d311c 100644 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -7,19 +7,16 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" -#include "MBlazeSubtarget.h" -#include "MBlazeRegisterInfo.h" -#include "MBlazeISelLowering.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -30,7 +27,7 @@ using namespace llvm; namespace { struct MBlazeOperand; -class MBlazeAsmParser : public TargetAsmParser { +class MBlazeAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; MCAsmParser &getParser() const { return Parser; } @@ -64,7 +61,7 @@ class MBlazeAsmParser : public TargetAsmParser { public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : TargetAsmParser(), Parser(_Parser) {} + : MCTargetAsmParser(), Parser(_Parser) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -286,19 +283,19 @@ void MBlazeOperand::print(raw_ostream &OS) const { break; case Register: OS << "<register R"; - OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">"; + OS << getMBlazeRegisterNumbering(getReg()) << ">"; break; case Token: OS << "'" << getToken() << "'"; break; case Memory: { OS << "<memory R"; - OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase()); + OS << getMBlazeRegisterNumbering(getMemBase()); OS << ", "; unsigned RegOff = getMemOffReg(); if (RegOff) - OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff); + OS << "R" << getMBlazeRegisterNumbering(RegOff); else OS << getMemOff(); OS << ">"; @@ -326,6 +323,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned ErrorInfo; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + default: break; case Match_Success: Out.EmitInstruction(Inst); return false; @@ -521,7 +519,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, return false; } -/// ParseDirective parses the arm specific directives +/// ParseDirective parses the MBlaze specific directives bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") @@ -558,7 +556,7 @@ extern "C" void LLVMInitializeMBlazeAsmLexer(); /// Force static initialization. extern "C" void LLVMInitializeMBlazeAsmParser() { - RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); + RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); LLVMInitializeMBlazeAsmLexer(); } diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile deleted file mode 100644 index 611a0f4..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmParser - -# Hack: we need to include 'main' MBlaze target directory for private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt deleted file mode 100644 index 9376e68..0000000 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeDisassembler - MBlazeDisassembler.cpp - ) - -# workaround for hanging compilation on MSVC9 and 10 -if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -set_property( - SOURCE MBlazeDisassembler.cpp - PROPERTY COMPILE_FLAGS "/Od" - ) -endif() - -add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 88d80a1..fd761f1 100644 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -20,9 +20,9 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" // #include "MBlazeGenDecoderTables.inc" @@ -60,27 +60,27 @@ static unsigned mblazeBinary2Opcode[] = { }; static unsigned getRD(uint32_t insn) { - if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F)) + if (!isMBlazeRegister((insn>>21)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>21)&0x1F); } static unsigned getRA(uint32_t insn) { - if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F)) + if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>16)&0x1F); } static unsigned getRB(uint32_t insn) { - if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F)) + if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>11)&0x1F); } static int64_t getRS(uint32_t insn) { - if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF)) + if (!isSpecialMBlazeRegister(insn&0x3FFF)) return UNSUPPORTED; - return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF); + return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF); } static int64_t getIMM(uint32_t insn) { @@ -493,11 +493,12 @@ EDInstInfo *MBlazeDisassembler::getEDInfo() const { // Public interface for the disassembler // -bool MBlazeDisassembler::getInstruction(MCInst &instr, +MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream) const { + raw_ostream &vStream, + raw_ostream &cStream) const { // The machine instruction. uint32_t insn; uint64_t read; @@ -508,7 +509,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // We want to read exactly 4 bytes of data. if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4) - return false; + return Fail; // Encoded as a big-endian 32-bit word in the stream. insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0); @@ -517,7 +518,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // that it is a valid instruction. unsigned opcode = getOPCODE(insn); if (opcode == UNSUPPORTED) - return false; + return Fail; instr.setOpcode(opcode); @@ -529,11 +530,11 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, uint64_t tsFlags = MBlazeInsts[opcode].TSFlags; switch ((tsFlags & MBlazeII::FormMask)) { default: - return false; + return Fail; case MBlazeII::FRRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RB)); instr.addOperand(MCOperand::CreateReg(RA)); @@ -541,7 +542,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateReg(RB)); @@ -550,23 +551,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRI: switch (opcode) { default: - return false; + return Fail; case MBlaze::MFS: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); break; case MBlaze::MTS: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlaze::MSRSET: case MBlaze::MSRCLR: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x7FFF)); break; @@ -575,7 +576,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRI: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); switch (opcode) { @@ -592,35 +593,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FCRR: if (RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCRI: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FRCR: if (RD == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRCI: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FCCR: if (RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RB)); break; @@ -630,7 +631,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRCI: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getSHT(insn))); @@ -638,35 +639,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRC: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRCX: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; case MBlazeII::FRCS: if (RD == UNSUPPORTED || RS == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RS)); break; case MBlazeII::FCRCS: if (RS == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RS)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FCRCX: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; @@ -677,13 +678,13 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FCR: if (RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRIR: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); instr.addOperand(MCOperand::CreateReg(RA)); @@ -693,11 +694,12 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // We always consume 4 bytes of data on success size = 4; - return true; + return Success; } -static MCDisassembler *createMBlazeDisassembler(const Target &T) { - return new MBlazeDisassembler; +static MCDisassembler *createMBlazeDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new MBlazeDisassembler(STI); } extern "C" void LLVMInitializeMBlazeDisassembler() { diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h index d05eced..0ac0d89 100644 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h +++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h @@ -32,19 +32,20 @@ class MBlazeDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - MBlazeDisassembler() : - MCDisassembler() { + MBlazeDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { } ~MBlazeDisassembler() { } /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, + MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream) const; + raw_ostream &vStream, + raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. EDInstInfo *getEDInfo() const; diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile deleted file mode 100644 index 0530b32..0000000 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeDisassembler - -# Hack: we need to include 'main' MBlaze target directory to grab headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt deleted file mode 100644 index 242a573..0000000 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeAsmPrinter - MBlazeInstPrinter.cpp - ) - -add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp index a7fd287..a1f1dbc 100644 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp @@ -25,8 +25,10 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #include "MBlazeGenAsmWriter.inc" -void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index eacca41..570ab08 100644 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -24,7 +24,7 @@ namespace llvm { MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile deleted file mode 100644 index 9fb6e86..0000000 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmPrinter - -# Hack: we need to include 'main' MBlaze target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h index 3390794..1399b85 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlaze.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h @@ -15,6 +15,7 @@ #ifndef TARGET_MBLAZE_H #define TARGET_MBLAZE_H +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "MCTargetDesc/MBlazeMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" @@ -22,17 +23,6 @@ namespace llvm { class MBlazeTargetMachine; class FunctionPass; class MachineCodeEmitter; - class MCCodeEmitter; - class MCInstrInfo; - class MCSubtargetInfo; - class TargetAsmBackend; - class formatted_raw_ostream; - - MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - - TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &); FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0016df5..97bd083 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -38,10 +38,10 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <cctype> @@ -136,19 +136,17 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() { const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + unsigned RegNum = getMBlazeRegisterNumbering(Reg); if (MBlaze::GPRRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); } // Return Address and Frame registers must also be set in CPUBitmask. if (TFI->hasFP(*MF)) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getFrameRegister(*MF))); + CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF))); if (MFI->adjustsStack()) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getRARegister())); + CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister())); // Print CPUBitmask OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask)); @@ -318,18 +316,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return I == Pred->end() || !I->getDesc().isBarrier(); } -static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeMBlazeAsmPrinter() { RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); - TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget, - createMBlazeMCInstPrinter); - } diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp index e763902..f28d5a7 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// +//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp index 62dfdcc..8ec548f 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -59,6 +59,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // MBlaze does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass); @@ -187,7 +188,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) computeRegisterProperties(); } -MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const { +EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -964,13 +965,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // The last register argument that must be saved is MBlaze::R10 TargetRegisterClass *RC = MBlaze::GPRRegisterClass; - unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5); - unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1); - unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10); + unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5); + unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1); + unsigned End = getMBlazeRegisterNumbering(MBlaze::R10); unsigned StackLoc = Start - Begin + 1; for (; Start <= End; ++Start, ++StackLoc) { - unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); + unsigned Reg = getMBlazeRegisterFromNumbering(Start); unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); @@ -1096,7 +1097,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h index bb128da..8b49bc3 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h @@ -102,7 +102,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - get the ISD::SETCC result ValueType - MVT::SimpleValueType getSetCCResultType(EVT VT) const; + EVT getSetCCResultType(EVT VT) const; private: // Subtarget Info diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 188f10a..7ae05b3 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR @@ -239,7 +239,8 @@ unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { +bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> + &Cond) const { assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!"); switch (Cond[0].getImm()) { default: return true; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h index 79f962b..7174405 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -166,62 +166,6 @@ namespace MBlaze { } } -/// MBlazeII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace MBlazeII { - enum { - // PseudoFrm - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - FPseudo = 0, - FRRR, - FRRI, - FCRR, - FCRI, - FRCR, - FRCI, - FCCR, - FCCI, - FRRCI, - FRRC, - FRCX, - FRCS, - FCRCS, - FCRCX, - FCX, - FCR, - FRIR, - FRRRR, - FRI, - FC, - FormMask = 63 - - //===------------------------------------------------------------------===// - // MBlaze Specific MachineOperand flags. - // MO_NO_FLAG, - - /// MO_GOT - Represents the offset into the global offset table at which - /// the address the relocation entry symbol resides during execution. - // MO_GOT, - - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides - /// during execution. This is different from the above since this flag - /// can only be present in call instructions. - // MO_GOT_CALL, - - /// MO_GPREL - Represents the offset from the current gp value to be used - /// for the relocatable object file being produced. - // MO_GPREL, - - /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol - /// address. - // MO_ABS_HILO - - }; -} - class MBlazeInstrInfo : public MBlazeGenInstrInfo { MBlazeTargetMachine &TM; const MBlazeRegisterInfo RI; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td index 950f2d7..1d8c987 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -442,17 +442,19 @@ let Predicates=[HasMul] in { //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, isReMaterializable = 1 in { - def LBU : LoadM<0x30, 0x000, "lbu ">; - def LBUR : LoadM<0x30, 0x200, "lbur ">; + let neverHasSideEffects = 1 in { + def LBU : LoadM<0x30, 0x000, "lbu ">; + def LBUR : LoadM<0x30, 0x200, "lbur ">; - def LHU : LoadM<0x31, 0x000, "lhu ">; - def LHUR : LoadM<0x31, 0x200, "lhur ">; + def LHU : LoadM<0x31, 0x000, "lhu ">; + def LHUR : LoadM<0x31, 0x200, "lhur ">; - def LW : LoadM<0x32, 0x000, "lw ">; - def LWR : LoadM<0x32, 0x200, "lwr ">; + def LW : LoadM<0x32, 0x000, "lw ">; + def LWR : LoadM<0x32, 0x200, "lwr ">; - let Defs = [CARRY] in { - def LWX : LoadM<0x32, 0x400, "lwx ">; + let Defs = [CARRY] in { + def LWX : LoadM<0x32, 0x400, "lwx ">; + } } def LBUI : LoadMI<0x38, "lbui ", zextloadi8>; @@ -877,6 +879,9 @@ def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>; // Peepholes def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>; +// Atomic fence +def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>; + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 32d67b2..ea81dd6 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -37,7 +37,7 @@ namespace mblazeIntrinsic { #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN } -std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, +std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys, unsigned numTys) const { static const char *const names[] = { #define GET_INTRINSIC_NAME_TABLE @@ -90,8 +90,8 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES -static const FunctionType *getType(LLVMContext &Context, unsigned id) { - const Type *ResultTy = NULL; +static FunctionType *getType(LLVMContext &Context, unsigned id) { + Type *ResultTy = NULL; std::vector<Type*> ArgTys; bool IsVarArg = false; @@ -103,7 +103,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) { } Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - const Type **Tys, + Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h index 9804c77..80760d8 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h @@ -19,12 +19,12 @@ namespace llvm { class MBlazeIntrinsicInfo : public TargetIntrinsicInfo { public: - std::string getName(unsigned IntrID, const Type **Tys = 0, + std::string getName(unsigned IntrID, Type **Tys = 0, unsigned numTys = 0) const; unsigned lookupName(const char *Name, unsigned Len) const; unsigned lookupGCCName(const char *Name) const; bool isOverloaded(unsigned IID) const; - Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0, unsigned numTys = 0) const; }; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index f0b201a..9788ba9 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -44,164 +43,7 @@ using namespace llvm; MBlazeRegisterInfo:: MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) - : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {} - -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// MBlaze::R0, return the number that it corresponds to (e.g. 0). -unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) { - switch (RegEnum) { - case MBlaze::R0 : return 0; - case MBlaze::R1 : return 1; - case MBlaze::R2 : return 2; - case MBlaze::R3 : return 3; - case MBlaze::R4 : return 4; - case MBlaze::R5 : return 5; - case MBlaze::R6 : return 6; - case MBlaze::R7 : return 7; - case MBlaze::R8 : return 8; - case MBlaze::R9 : return 9; - case MBlaze::R10 : return 10; - case MBlaze::R11 : return 11; - case MBlaze::R12 : return 12; - case MBlaze::R13 : return 13; - case MBlaze::R14 : return 14; - case MBlaze::R15 : return 15; - case MBlaze::R16 : return 16; - case MBlaze::R17 : return 17; - case MBlaze::R18 : return 18; - case MBlaze::R19 : return 19; - case MBlaze::R20 : return 20; - case MBlaze::R21 : return 21; - case MBlaze::R22 : return 22; - case MBlaze::R23 : return 23; - case MBlaze::R24 : return 24; - case MBlaze::R25 : return 25; - case MBlaze::R26 : return 26; - case MBlaze::R27 : return 27; - case MBlaze::R28 : return 28; - case MBlaze::R29 : return 29; - case MBlaze::R30 : return 30; - case MBlaze::R31 : return 31; - case MBlaze::RPC : return 0x0000; - case MBlaze::RMSR : return 0x0001; - case MBlaze::REAR : return 0x0003; - case MBlaze::RESR : return 0x0005; - case MBlaze::RFSR : return 0x0007; - case MBlaze::RBTR : return 0x000B; - case MBlaze::REDR : return 0x000D; - case MBlaze::RPID : return 0x1000; - case MBlaze::RZPR : return 0x1001; - case MBlaze::RTLBX : return 0x1002; - case MBlaze::RTLBLO : return 0x1003; - case MBlaze::RTLBHI : return 0x1004; - case MBlaze::RPVR0 : return 0x2000; - case MBlaze::RPVR1 : return 0x2001; - case MBlaze::RPVR2 : return 0x2002; - case MBlaze::RPVR3 : return 0x2003; - case MBlaze::RPVR4 : return 0x2004; - case MBlaze::RPVR5 : return 0x2005; - case MBlaze::RPVR6 : return 0x2006; - case MBlaze::RPVR7 : return 0x2007; - case MBlaze::RPVR8 : return 0x2008; - case MBlaze::RPVR9 : return 0x2009; - case MBlaze::RPVR10 : return 0x200A; - case MBlaze::RPVR11 : return 0x200B; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -/// getRegisterFromNumbering - Given the enum value for some register, e.g. -/// MBlaze::R0, return the number that it corresponds to (e.g. 0). -unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) { - switch (Reg) { - case 0 : return MBlaze::R0; - case 1 : return MBlaze::R1; - case 2 : return MBlaze::R2; - case 3 : return MBlaze::R3; - case 4 : return MBlaze::R4; - case 5 : return MBlaze::R5; - case 6 : return MBlaze::R6; - case 7 : return MBlaze::R7; - case 8 : return MBlaze::R8; - case 9 : return MBlaze::R9; - case 10 : return MBlaze::R10; - case 11 : return MBlaze::R11; - case 12 : return MBlaze::R12; - case 13 : return MBlaze::R13; - case 14 : return MBlaze::R14; - case 15 : return MBlaze::R15; - case 16 : return MBlaze::R16; - case 17 : return MBlaze::R17; - case 18 : return MBlaze::R18; - case 19 : return MBlaze::R19; - case 20 : return MBlaze::R20; - case 21 : return MBlaze::R21; - case 22 : return MBlaze::R22; - case 23 : return MBlaze::R23; - case 24 : return MBlaze::R24; - case 25 : return MBlaze::R25; - case 26 : return MBlaze::R26; - case 27 : return MBlaze::R27; - case 28 : return MBlaze::R28; - case 29 : return MBlaze::R29; - case 30 : return MBlaze::R30; - case 31 : return MBlaze::R31; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) { - switch (Reg) { - case 0x0000 : return MBlaze::RPC; - case 0x0001 : return MBlaze::RMSR; - case 0x0003 : return MBlaze::REAR; - case 0x0005 : return MBlaze::RESR; - case 0x0007 : return MBlaze::RFSR; - case 0x000B : return MBlaze::RBTR; - case 0x000D : return MBlaze::REDR; - case 0x1000 : return MBlaze::RPID; - case 0x1001 : return MBlaze::RZPR; - case 0x1002 : return MBlaze::RTLBX; - case 0x1003 : return MBlaze::RTLBLO; - case 0x1004 : return MBlaze::RTLBHI; - case 0x2000 : return MBlaze::RPVR0; - case 0x2001 : return MBlaze::RPVR1; - case 0x2002 : return MBlaze::RPVR2; - case 0x2003 : return MBlaze::RPVR3; - case 0x2004 : return MBlaze::RPVR4; - case 0x2005 : return MBlaze::RPVR5; - case 0x2006 : return MBlaze::RPVR6; - case 0x2007 : return MBlaze::RPVR7; - case 0x2008 : return MBlaze::RPVR8; - case 0x2009 : return MBlaze::RPVR9; - case 0x200A : return MBlaze::RPVR10; - case 0x200B : return MBlaze::RPVR11; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -bool MBlazeRegisterInfo::isRegister(unsigned Reg) { - return Reg <= 31; -} - -bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) { - switch (Reg) { - case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : - case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : - case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : - case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : - case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : - case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : - return true; - - default: - return false; - } - return false; // Not reached -} + : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {} unsigned MBlazeRegisterInfo::getPICCallReg() { return MBlaze::R20; @@ -334,10 +176,6 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF) const { MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset()); } -unsigned MBlazeRegisterInfo::getRARegister() const { - return MBlaze::R15; -} - unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -353,11 +191,3 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { - return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0); -} - -int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h index 7ebce21..7e4b269 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -42,14 +42,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget, const TargetInstrInfo &tii); - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// MBlaze::RA, return the number that it corresponds to (e.g. 31). - static unsigned getRegisterNumbering(unsigned RegEnum); - static unsigned getRegisterFromNumbering(unsigned RegEnum); - static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum); - static bool isRegister(unsigned RegEnum); - static bool isSpecialRegister(unsigned RegEnum); - /// Get PIC indirect call register static unsigned getPICCallReg(); @@ -69,15 +61,11 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; /// Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp index eda141d..7e5667f 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -15,7 +15,7 @@ #include "MBlaze.h" #include "MBlazeRegisterInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 7208874..7bff53e 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -16,48 +16,13 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) { - llvm_unreachable("MBlaze does not support Darwin MACH-O format"); - return NULL; - } - - if (TheTriple.isOSWindows()) { - llvm_unreachable("MBlaze does not support Windows COFF format"); - return NULL; - } - - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); -} - - extern "C" void LLVMInitializeMBlazeTarget() { // Register the target. RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); - - // Register the MC code emitter - TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget, - llvm::createMBlazeMCCodeEmitter); - - // Register the asm backend - TargetRegistry::RegisterAsmBackend(TheMBlazeTarget, - createMBlazeAsmBackend); - - // Register the object streamer - TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget, - createMCStreamer); - } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -67,21 +32,16 @@ extern "C" void LLVMInitializeMBlazeTarget() { // offset from the stack/frame pointer, using StackGrowsUp enables // an easier handling. MBlazeTargetMachine:: -MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS): - LLVMTargetMachine(T, TT, CPU, FS), +MBlazeTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - if (getRelocationModel() == Reloc::Default) { - setRelocationModel(Reloc::Static); - } - - if (getCodeModel() == CodeModel::Default) - setCodeModel(CodeModel::Small); } // Install an instruction selector pass using diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h index cd6caaf..c1bc08a 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -41,8 +41,9 @@ namespace llvm { InstrItineraryData InstrItins; public: - MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MBlazeTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index abd1b0b..f66ea30 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -69,7 +69,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isMergeable1ByteCString()) return false; - const Type *Ty = GV->getType()->getElementType(); + Type *Ty = GV->getType()->getElementType(); return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); } diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 3d15708..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMBlazeDesc - MBlazeMCTargetDesc.cpp - MBlazeMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index 08f14c3..08f7d46a 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -7,10 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "MBlaze.h" -#include "MBlazeELFWriterInfo.h" -#include "llvm/ADT/Twine.h" +#include "MCTargetDesc/MBlazeMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -20,11 +18,11 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; static unsigned getFixupKindSize(unsigned Kind) { @@ -48,10 +46,10 @@ public: /*HasRelocationAddend*/ true) {} }; -class MBlazeAsmBackend : public TargetAsmBackend { +class MBlazeAsmBackend : public MCAsmBackend { public: MBlazeAsmBackend(const Target &T) - : TargetAsmBackend() { + : MCAsmBackend() { } unsigned getNumFixupKinds() const { @@ -148,8 +146,7 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, } } // end anonymous namespace -TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h new file mode 100644 index 0000000..776dbc4 --- /dev/null +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h @@ -0,0 +1,240 @@ +//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the MBlaze target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef MBlazeBASEINFO_H +#define MBlazeBASEINFO_H + +#include "MBlazeMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// MBlazeII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MBlazeII { + enum { + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + FPseudo = 0, + FRRR, + FRRI, + FCRR, + FCRI, + FRCR, + FRCI, + FCCR, + FCCI, + FRRCI, + FRRC, + FRCX, + FRCS, + FCRCS, + FCRCX, + FCX, + FCR, + FRIR, + FRRRR, + FRI, + FC, + FormMask = 63 + + //===------------------------------------------------------------------===// + // MBlaze Specific MachineOperand flags. + // MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + // MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + // MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + // MO_GPREL, + + /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol + /// address. + // MO_ABS_HILO + + }; +} + +static inline bool isMBlazeRegister(unsigned Reg) { + return Reg <= 31; +} + +static inline bool isSpecialMBlazeRegister(unsigned Reg) { + switch (Reg) { + case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : + case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : + case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : + case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : + case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : + case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : + return true; + + default: + return false; + } + return false; // Not reached +} + +/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) { + switch (RegEnum) { + case MBlaze::R0 : return 0; + case MBlaze::R1 : return 1; + case MBlaze::R2 : return 2; + case MBlaze::R3 : return 3; + case MBlaze::R4 : return 4; + case MBlaze::R5 : return 5; + case MBlaze::R6 : return 6; + case MBlaze::R7 : return 7; + case MBlaze::R8 : return 8; + case MBlaze::R9 : return 9; + case MBlaze::R10 : return 10; + case MBlaze::R11 : return 11; + case MBlaze::R12 : return 12; + case MBlaze::R13 : return 13; + case MBlaze::R14 : return 14; + case MBlaze::R15 : return 15; + case MBlaze::R16 : return 16; + case MBlaze::R17 : return 17; + case MBlaze::R18 : return 18; + case MBlaze::R19 : return 19; + case MBlaze::R20 : return 20; + case MBlaze::R21 : return 21; + case MBlaze::R22 : return 22; + case MBlaze::R23 : return 23; + case MBlaze::R24 : return 24; + case MBlaze::R25 : return 25; + case MBlaze::R26 : return 26; + case MBlaze::R27 : return 27; + case MBlaze::R28 : return 28; + case MBlaze::R29 : return 29; + case MBlaze::R30 : return 30; + case MBlaze::R31 : return 31; + case MBlaze::RPC : return 0x0000; + case MBlaze::RMSR : return 0x0001; + case MBlaze::REAR : return 0x0003; + case MBlaze::RESR : return 0x0005; + case MBlaze::RFSR : return 0x0007; + case MBlaze::RBTR : return 0x000B; + case MBlaze::REDR : return 0x000D; + case MBlaze::RPID : return 0x1000; + case MBlaze::RZPR : return 0x1001; + case MBlaze::RTLBX : return 0x1002; + case MBlaze::RTLBLO : return 0x1003; + case MBlaze::RTLBHI : return 0x1004; + case MBlaze::RPVR0 : return 0x2000; + case MBlaze::RPVR1 : return 0x2001; + case MBlaze::RPVR2 : return 0x2002; + case MBlaze::RPVR3 : return 0x2003; + case MBlaze::RPVR4 : return 0x2004; + case MBlaze::RPVR5 : return 0x2005; + case MBlaze::RPVR6 : return 0x2006; + case MBlaze::RPVR7 : return 0x2007; + case MBlaze::RPVR8 : return 0x2008; + case MBlaze::RPVR9 : return 0x2009; + case MBlaze::RPVR10 : return 0x200A; + case MBlaze::RPVR11 : return 0x200B; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +/// getRegisterFromNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0 : return MBlaze::R0; + case 1 : return MBlaze::R1; + case 2 : return MBlaze::R2; + case 3 : return MBlaze::R3; + case 4 : return MBlaze::R4; + case 5 : return MBlaze::R5; + case 6 : return MBlaze::R6; + case 7 : return MBlaze::R7; + case 8 : return MBlaze::R8; + case 9 : return MBlaze::R9; + case 10 : return MBlaze::R10; + case 11 : return MBlaze::R11; + case 12 : return MBlaze::R12; + case 13 : return MBlaze::R13; + case 14 : return MBlaze::R14; + case 15 : return MBlaze::R15; + case 16 : return MBlaze::R16; + case 17 : return MBlaze::R17; + case 18 : return MBlaze::R18; + case 19 : return MBlaze::R19; + case 20 : return MBlaze::R20; + case 21 : return MBlaze::R21; + case 22 : return MBlaze::R22; + case 23 : return MBlaze::R23; + case 24 : return MBlaze::R24; + case 25 : return MBlaze::R25; + case 26 : return MBlaze::R26; + case 27 : return MBlaze::R27; + case 28 : return MBlaze::R28; + case 29 : return MBlaze::R29; + case 30 : return MBlaze::R30; + case 31 : return MBlaze::R31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0x0000 : return MBlaze::RPC; + case 0x0001 : return MBlaze::RMSR; + case 0x0003 : return MBlaze::REAR; + case 0x0005 : return MBlaze::RESR; + case 0x0007 : return MBlaze::RFSR; + case 0x000B : return MBlaze::RBTR; + case 0x000D : return MBlaze::REDR; + case 0x1000 : return MBlaze::RPID; + case 0x1001 : return MBlaze::RZPR; + case 0x1002 : return MBlaze::RTLBX; + case 0x1003 : return MBlaze::RTLBLO; + case 0x1004 : return MBlaze::RTLBHI; + case 0x2000 : return MBlaze::RPVR0; + case 0x2001 : return MBlaze::RPVR1; + case 0x2002 : return MBlaze::RPVR2; + case 0x2003 : return MBlaze::RPVR3; + case 0x2004 : return MBlaze::RPVR4; + case 0x2005 : return MBlaze::RPVR5; + case 0x2006 : return MBlaze::RPVR6; + case 0x2007 : return MBlaze::RPVR7; + case 0x2008 : return MBlaze::RPVR8; + case 0x2009 : return MBlaze::RPVR9; + case 0x200A : return MBlaze::RPVR10; + case 0x200B : return MBlaze::RPVR11; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp index ddc636d..1514557 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "MBlaze.h" -#include "MBlazeInstrInfo.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" +#include "MCTargetDesc/MBlazeMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCFixup.h" #include "llvm/ADT/Statistic.h" @@ -106,7 +108,7 @@ MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO) const { if (MO.isReg()) - return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg()); + return getMBlazeRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isExpr()) diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 20d6c0b..43ae281 100644 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "MBlazeMCTargetDesc.h" #include "MBlazeMCAsmInfo.h" +#include "InstPrinter/MBlazeInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MBlazeGenInstrInfo.inc" @@ -36,8 +40,10 @@ static MCInstrInfo *createMBlazeMCInstrInfo() { return X; } -extern "C" void LLVMInitializeMBlazeMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo); +static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMBlazeMCRegisterInfo(X, MBlaze::R15); + return X; } static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,11 +53,6 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget, - createMBlazeMCSubtargetInfo); -} - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -60,6 +61,80 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } -extern "C" void LLVMInitializeMBlazeMCAsmInfo() { +static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::Static; + if (CM == CodeModel::Default) + CM = CodeModel::Small; + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) { + llvm_unreachable("MBlaze does not support Darwin MACH-O format"); + return NULL; + } + + if (TheTriple.isOSWindows()) { + llvm_unreachable("MBlaze does not support Windows COFF format"); + return NULL; + } + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new MBlazeInstPrinter(MAI); + return 0; +} + +// Force static initialization. +extern "C" void LLVMInitializeMBlazeTargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget, + createMBlazeMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget, + createMBlazeMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget, + createMBlazeMCSubtargetInfo); + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget, + llvm::createMBlazeMCCodeEmitter); + + // Register the asm backend + TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget, + createMBlazeAsmBackend); + + // Register the object streamer + TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget, + createMBlazeMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index b14772e..deff5cb 100644 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -15,12 +15,23 @@ #define MBLAZEMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCContext; +class MCCodeEmitter; +class MCInstrInfo; class MCSubtargetInfo; class Target; class StringRef; +class formatted_raw_ostream; extern Target TheMBlazeTarget; +MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for MBlaze registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile deleted file mode 100644 index 71075ff..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp index 16e01db..71210d8 100644 --- a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp @@ -9,7 +9,7 @@ #include "MBlaze.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMBlazeTarget; diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt deleted file mode 100644 index f5458d5..0000000 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSP430AsmPrinter - MSP430InstPrinter.cpp - ) -add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index e10d4fe..5d6c6ad 100644 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -25,8 +25,10 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #include "MSP430GenAsmWriter.inc" -void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 50d98b7..a1984a8 100644 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -22,9 +22,9 @@ namespace llvm { class MSP430InstPrinter : public MCInstPrinter { public: MSP430InstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + : MCInstPrinter(MAI) {} - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile deleted file mode 100644 index a5293ab..0000000 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430AsmPrinter - -# Hack: we need to include 'main' MSP430 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 0f3ebd3..0000000 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMSP430Desc - MSP430MCTargetDesc.cpp - MSP430MCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 43a704d..fda70b8 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "MSP430MCTargetDesc.h" #include "MSP430MCAsmInfo.h" +#include "InstPrinter/MSP430InstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MSP430GenInstrInfo.inc" @@ -29,18 +31,18 @@ using namespace llvm; - static MCInstrInfo *createMSP430MCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitMSP430MCInstrInfo(X); return X; } -extern "C" void LLVMInitializeMSP430MCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo); +static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMSP430MCRegisterInfo(X, MSP430::PCW); + return X; } - static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -48,11 +50,42 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMSP430MCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target, - createMSP430MCSubtargetInfo); +static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new MSP430InstPrinter(MAI); + return 0; } -extern "C" void LLVMInitializeMSP430MCAsmInfo() { +extern "C" void LLVMInitializeMSP430TargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target, + createMSP430MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMSP430Target, + createMSP430MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target, + createMSP430MCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, + createMSP430MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 0d8a6bd..35f2590 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHAMCTARGETDESC_H -#define ALPHAMCTARGETDESC_H +#ifndef MSP430MCTARGETDESC_H +#define MSP430MCTARGETDESC_H namespace llvm { class MCSubtargetInfo; diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile deleted file mode 100644 index bb85799..0000000 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index 2042056..8836549 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -32,9 +32,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -163,17 +161,7 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); } -static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); - TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, - createMSP430MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index 0a3eab1..dc37431 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -79,6 +79,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setSchedulingPreference(Sched::Latency); // We have post-incremented loads / stores. @@ -987,8 +988,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { } } -bool MSP430TargetLowering::isTruncateFree(const Type *Ty1, - const Type *Ty2) const { +bool MSP430TargetLowering::isTruncateFree(Type *Ty1, + Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; @@ -1002,7 +1003,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return (VT1.getSizeInBits() > VT2.getSizeInBits()); } -bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { +bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // MSP430 implicitly zero-extends 8-bit results in 16-bit registers. return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16); } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h index bd660a0..237f604 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -102,7 +102,7 @@ namespace llvm { /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. - virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a value @@ -113,7 +113,7 @@ namespace llvm { /// necessarily apply to truncate instructions. e.g. on msp430, all /// instructions that define 8-bit values implicit zero-extend the result /// out to 16 bits. - virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 846d093..ffd4318 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "MSP430GenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp index 1cc60bb..9049c4b 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -34,7 +34,7 @@ using namespace llvm; // FIXME: Provide proper call frame setup / destroy opcodes. MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii) - : MSP430GenRegisterInfo(), TM(tm), TII(tii) { + : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) { StackAlign = TM.getFrameLowering()->getStackAlignment(); } @@ -233,22 +233,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } -unsigned MSP430RegisterInfo::getRARegister() const { - return MSP430::PCW; -} - unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW; } - -int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("Not implemented yet!"); - return 0; -} - -int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("Not implemented yet!"); - return 0; -} diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h index fb70594..10a3d53 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h @@ -58,12 +58,7 @@ public: void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp index b58c50a..3ee14d9 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp @@ -13,7 +13,7 @@ #include "MSP430Subtarget.h" #include "MSP430.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 971f512..4dd8933 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -16,7 +16,7 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeMSP430Target() { @@ -25,10 +25,11 @@ extern "C" void LLVMInitializeMSP430Target() { } MSP430TargetMachine::MSP430TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, + StringRef CPU, + StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h index 2a9eea0..eb483dc 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -38,8 +38,9 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430FrameLowering FrameLowering; public: - MSP430TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MSP430TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp index f9ca5c4..8b3e01e 100644 --- a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp @@ -9,7 +9,7 @@ #include "MSP430.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMSP430Target; diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp index 46c687b..53ad155f 100644 --- a/contrib/llvm/lib/Target/Mangler.cpp +++ b/contrib/llvm/lib/Target/Mangler.cpp @@ -159,7 +159,7 @@ static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName, unsigned ArgWords = 0; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { - const Type *Ty = AI->getType(); + Type *Ty = AI->getType(); // 'Dereference' type in case of byval parameter attribute if (AI->hasByValAttr()) Ty = cast<PointerType>(Ty)->getElementType(); @@ -214,7 +214,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, // fastcall and stdcall functions usually need @42 at the end to specify // the argument info. - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) && // "Pure" variadic functions do not receive @0 suffix. (!FT->isVarArg() || FT->getNumParams() == 0 || diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt deleted file mode 100644 index 8852fd4..0000000 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMipsAsmPrinter - MipsInstPrinter.cpp - ) -add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile deleted file mode 100644 index 63e38ef..0000000 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 41c1dd3..3dafc61 100644 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -1,4 +1,4 @@ -//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===// +//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===// // // The LLVM Compiler Infrastructure // @@ -69,8 +69,10 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << '$' << LowercaseString(getRegisterName(RegNo)); } -void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 680208e..5c11165 100644 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===// +//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===// // // The LLVM Compiler Infrastructure // @@ -86,7 +86,7 @@ public: virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 97de75d..0000000 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMipsDesc - MipsMCTargetDesc.cpp - MipsMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile deleted file mode 100644 index 7fe2086..0000000 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp new file mode 100644 index 0000000..f190ec4 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -0,0 +1,117 @@ +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class MipsELFObjectWriter : public MCELFObjectTargetWriter { +public: + MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, + HasRelocationAddend) {} +}; + +class MipsAsmBackend : public MCAsmBackend { +public: + MipsAsmBackend(const Target &T) + : MCAsmBackend() {} + + unsigned getNumFixupKinds() const { + return 1; //tbd + } + + /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided + /// data fragment, at the offset specified by the fixup and following the + /// fixup kind as appropriate. + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + } + + /// @name Target Relaxation Interfaces + /// @{ + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool MayNeedRelaxation(const MCInst &Inst) const { + return false; + } + + /// RelaxInstruction - Relax the instruction in the given fragment to the next + /// wider instruction. + /// + /// \param Inst - The instruction to relax, which may be the same as the + /// output. + /// \parm Res [output] - On return, the relaxed instruction. + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + } + + /// @} + + /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given + /// output. If the target cannot generate such a sequence, it should return an + /// error. + /// + /// \return - True on success. + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + return false; + } +}; + +class MipsEB_AsmBackend : public MipsAsmBackend { +public: + Triple::OSType OSType; + + MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType) + : MipsAsmBackend(T), OSType(_OSType) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(createELFObjectTargetWriter(), + OS, /*IsLittleEndian*/ false); + } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); + } +}; + +class MipsEL_AsmBackend : public MipsAsmBackend { +public: + Triple::OSType OSType; + + MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType) + : MipsAsmBackend(T), OSType(_OSType) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(createELFObjectTargetWriter(), + OS, /*IsLittleEndian*/ true); + } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); + } +}; +} + +MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + // just return little endian for now + // + return new MipsEL_AsmBackend(T, Triple(TT).getOS()); +} diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h new file mode 100644 index 0000000..f7a6fa9 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -0,0 +1,113 @@ +//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Mips target useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef MIPSBASEINFO_H +#define MIPSBASEINFO_H + +#include "MipsMCTargetDesc.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +/// getMipsRegisterNumbering - Given the enum value for some register, +/// return the number that it corresponds to. +inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) +{ + switch (RegEnum) { + case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: + case Mips::D0: + return 0; + case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + return 1; + case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: + case Mips::D1: + return 2; + case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + return 3; + case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: + case Mips::D2: + return 4; + case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64: + return 5; + case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64: + case Mips::D3: + return 6; + case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64: + return 7; + case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64: + case Mips::D4: + return 8; + case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64: + return 9; + case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64: + case Mips::D5: + return 10; + case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64: + return 11; + case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64: + case Mips::D6: + return 12; + case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64: + return 13; + case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64: + case Mips::D7: + return 14; + case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64: + return 15; + case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64: + case Mips::D8: + return 16; + case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64: + return 17; + case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64: + case Mips::D9: + return 18; + case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64: + return 19; + case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64: + case Mips::D10: + return 20; + case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64: + return 21; + case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64: + case Mips::D11: + return 22; + case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64: + return 23; + case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64: + case Mips::D12: + return 24; + case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64: + return 25; + case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64: + case Mips::D13: + return 26; + case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64: + return 27; + case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64: + case Mips::D14: + return 28; + case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + return 29; + case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: + case Mips::D15: + return 30; + case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: + return 31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h new file mode 100644 index 0000000..8b099ea --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -0,0 +1,90 @@ +#ifndef LLVM_Mips_MipsFIXUPKINDS_H +#define LLVM_Mips_MipsFIXUPKINDS_H + +//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Mips { + enum Fixups { + // fixup_Mips_xxx - R_MIPS_NONE + fixup_Mips_NONE = FirstTargetFixupKind, + + // fixup_Mips_xxx - R_MIPS_16. + fixup_Mips_16, + + // fixup_Mips_xxx - R_MIPS_32. + fixup_Mips_32, + + // fixup_Mips_xxx - R_MIPS_REL32. + fixup_Mips_REL32, + + // fixup_Mips_xxx - R_MIPS_26. + fixup_Mips_26, + + // fixup_Mips_xxx - R_MIPS_HI16. + fixup_Mips_HI16, + + // fixup_Mips_xxx - R_MIPS_LO16. + fixup_Mips_LO16, + + // fixup_Mips_xxx - R_MIPS_GPREL16. + fixup_Mips_GPREL16, + + // fixup_Mips_xxx - R_MIPS_LITERAL. + fixup_Mips_LITERAL, + + // fixup_Mips_xxx - R_MIPS_GOT16. + fixup_Mips_GOT16, + + // fixup_Mips_xxx - R_MIPS_PC16. + fixup_Mips_PC16, + + // fixup_Mips_xxx - R_MIPS_CALL16. + fixup_Mips_CALL16, + + // fixup_Mips_xxx - R_MIPS_GPREL32. + fixup_Mips_GPREL32, + + // fixup_Mips_xxx - R_MIPS_SHIFT5. + fixup_Mips_SHIFT5, + + // fixup_Mips_xxx - R_MIPS_SHIFT6. + fixup_Mips_SHIFT6, + + // fixup_Mips_xxx - R_MIPS_64. + fixup_Mips_64, + + // fixup_Mips_xxx - R_MIPS_TLS_GD. + fixup_Mips_TLSGD, + + // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL. + fixup_Mips_GOTTPREL, + + // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16. + fixup_Mips_TPREL_HI, + + // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16. + fixup_Mips_TPREL_LO, + + // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16 + fixup_Mips_Branch_PCRel, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; +} // namespace llvm +} // namespace Mips + + +#endif /* LLVM_Mips_MipsFIXUPKINDS_H */ diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 5d92425..71ae804 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -18,7 +18,8 @@ using namespace llvm; MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::mips) + if ((TheTriple.getArch() == Triple::mips) || + (TheTriple.getArch() == Triple::mips64)) IsLittleEndian = false; AlignmentIsInBytes = false; diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp new file mode 100644 index 0000000..d66de23 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -0,0 +1,52 @@ +//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// +// +#define DEBUG_TYPE "mccodeemitter" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" + +using namespace llvm; + +namespace { +class MipsMCCodeEmitter : public MCCodeEmitter { + MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; + +public: + MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti) {} + + ~MipsMCCodeEmitter() {} + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + } +}; // class MipsMCCodeEmitter +} // namespace + +MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new MipsMCCodeEmitter(MCII, STI, Ctx); +} diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 06f0d0b..1f9e3dd 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "MipsMCTargetDesc.h" #include "MipsMCAsmInfo.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MipsGenInstrInfo.inc" @@ -35,11 +39,12 @@ static MCInstrInfo *createMipsMCInstrInfo() { return X; } -extern "C" void LLVMInitializeMipsMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); +static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMipsMCRegisterInfo(X, Mips::RA); + return X; } - static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -47,12 +52,111 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMipsMCSubtargetInfo() { +static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(Mips::SP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::PIC_; + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createMipsMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + return new MipsInstPrinter(MAI); +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +extern "C" void LLVMInitializeMipsTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo); + RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo); + RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo); + RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget, + createMipsMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMips64elTarget, + createMipsMCRegisterInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, + createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMips64Target, + createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget, + createMipsMCCodeEmitter); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, createMipsMCSubtargetInfo); -} + TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget, + createMipsMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target, + createMipsMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget, + createMipsMCSubtargetInfo); -extern "C" void LLVMInitializeMipsMCAsmInfo() { - RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget); - RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMips64Target, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget, + createMipsMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 3d18f11..7a0042a 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,21 +7,32 @@ // //===----------------------------------------------------------------------===// // -// This file provides Alpha specific target descriptions. +// This file provides Mips specific target descriptions. // //===----------------------------------------------------------------------===// -#ifndef ALPHAMCTARGETDESC_H -#define ALPHAMCTARGETDESC_H +#ifndef MIPSMCTARGETDESC_H +#define MIPSMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCInstrInfo; +class MCCodeEmitter; +class MCContext; class MCSubtargetInfo; -class Target; class StringRef; +class Target; extern Target TheMipsTarget; extern Target TheMipselTarget; +extern Target TheMips64Target; +extern Target TheMips64elTarget; + +MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); +MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT); } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h index 984b5ad..bacecf2 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.h +++ b/contrib/llvm/lib/Target/Mips/Mips.h @@ -29,6 +29,9 @@ namespace llvm { FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM); + FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE); + } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td index 433cd57..39c2c16 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.td +++ b/contrib/llvm/lib/Target/Mips/Mips.td @@ -38,6 +38,10 @@ def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32", "Enable o32 ABI">; +def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32", + "Enable n32 ABI">; +def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", + "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", @@ -54,16 +58,19 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true", "Enable 'byte/half swap' instructions.">; def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true", "Enable 'count leading bits' instructions.">; -def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", - "Mips1 ISA Support">; -def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", - "Mips2 ISA Support">; def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", "Mips32 ISA Support", [FeatureCondMov, FeatureBitCount]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", [FeatureMips32, FeatureSEInReg]>; +def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion", + "Mips64", "Mips64 ISA Support", + [FeatureGP64Bit, FeatureFP64Bit, + FeatureMips32]>; +def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", + "Mips64r2", "Mips64r2 ISA Support", + [FeatureMips64, FeatureMips32r2]>; //===----------------------------------------------------------------------===// // Mips processors supported. @@ -72,21 +79,10 @@ def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, MipsGenericItineraries, Features>; -def : Proc<"mips1", [FeatureMips1]>; -def : Proc<"r2000", [FeatureMips1]>; -def : Proc<"r3000", [FeatureMips1]>; - -def : Proc<"mips2", [FeatureMips2]>; -def : Proc<"r6000", [FeatureMips2]>; - +def : Proc<"mips32r1", [FeatureMips32]>; def : Proc<"4ke", [FeatureMips32r2]>; - -// Allegrex is a 32bit subset of r4000, both for integer and fp registers, -// but much more similar to Mips2 than Mips3. It also contains some of -// Mips32/Mips32r2 instructions and a custom vector fpu processor. -def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, - FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd, - FeatureMinMax, FeatureSwap, FeatureBitCount]>; +def : Proc<"mips64r1", [FeatureMips64]>; +def : Proc<"mips64r2", [FeatureMips64r2]>; def MipsAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td new file mode 100644 index 0000000..49b0223 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -0,0 +1,214 @@ +//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips64 instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mips Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +// Instruction operand types +def shamt_64 : Operand<i64>; + +// Unsigned Operand +def uimm16_64 : Operand<i64> { + let PrintMethod = "printUnsignedImm"; +} + +// Transformation Function - get Imm - 32. +def Subtract32 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() - 32); +}]>; + +// imm32_63 predicate - True if imm is in range [32, 63]. +def imm32_63 : ImmLeaf<i64, + [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}], + Subtract32>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// +// Shifts +class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm, + SDNode OpNode, PatFrag PF>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$b, shamt_64:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, (i64 PF:$c)))], + IIAlu> { + let rs = _rs; +} + +class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm, + SDNode OpNode>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$c, CPU64Regs:$b), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, CPU64Regs:$c))], IIAlu> { + let shamt = _shamt; +} + +// Mul, Div +let Defs = [HI64, LO64] in { + let isCommutable = 1 in + class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b), + !strconcat(instr_asm, "\t$a, $b"), [], itin>; + + class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b), + !strconcat(instr_asm, "\t$$zero, $a, $b"), + [(op CPU64Regs:$a, CPU64Regs:$b)], itin>; +} + +// Move from Hi/Lo +let shamt = 0 in { +let rs = 0, rt = 0 in +class MoveFromLOHI64<bits<6> func, string instr_asm>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins), + !strconcat(instr_asm, "\t$dst"), [], IIHiLo>; + +let rt = 0, rd = 0 in +class MoveToLOHI64<bits<6> func, string instr_asm>: + FR<0x00, func, (outs), (ins CPU64Regs:$src), + !strconcat(instr_asm, "\t$src"), [], IIHiLo>; +} + +// Count Leading Ones/Zeros in Word +class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>: + FR<0x1c, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$src), + !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>, + Requires<[HasBitCount]> { + let shamt = 0; + let rt = rd; +} + +//===----------------------------------------------------------------------===// +// Instruction definition +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, + CPU64Regs>; +def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>; +def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; +def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; +def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>; +def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>; + +/// Arithmetic Instructions (3-Operand, R-Type) +def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>; +def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>; +def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>; +def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>; +def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>; +def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>; +def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>; +def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; + +/// Shift Instructions +def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>; +def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>; +def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>; +def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>; +def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>; +def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>; +def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>; +def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>; +def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>; + +// Rotate Instructions +let Predicates = [HasMips64r2] in { + def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>; + def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr, + imm32_63>; + def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>; +} + +/// Load and Store Instructions +/// aligned +defm LB64 : LoadM64<0x20, "lb", sextloadi8>; +defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>; +defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>; +defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>; +defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>; +defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>; +defm SB64 : StoreM64<0x28, "sb", truncstorei8>; +defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>; +defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>; +defm LD : LoadM64<0x37, "ld", load_a>; +defm SD : StoreM64<0x3f, "sd", store_a>; + +/// unaligned +defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>; +defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>; +defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>; +defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>; +defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; +defm ULD : LoadM64<0x37, "uld", load_u, 1>; +defm USD : StoreM64<0x3f, "usd", store_u, 1>; + +/// Jump and Branch Instructions +def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; +def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; +def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; +def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; +def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>; +def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; + +/// Multiply and Divide Instructions. +def DMULT : Mul64<0x1c, "dmult", IIImul>; +def DMULTu : Mul64<0x1d, "dmultu", IIImul>; +def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>; +def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>; + +let Defs = [HI64] in + def MTHI64 : MoveToLOHI64<0x11, "mthi">; +let Defs = [LO64] in + def MTLO64 : MoveToLOHI64<0x13, "mtlo">; + +let Uses = [HI64] in + def MFHI64 : MoveFromLOHI64<0x10, "mfhi">; +let Uses = [LO64] in + def MFLO64 : MoveFromLOHI64<0x12, "mflo">; + +/// Count Leading +def DCLZ : CountLeading64<0x24, "dclz", + [(set CPU64Regs:$dst, (ctlz CPU64Regs:$src))]>; +def DCLO : CountLeading64<0x25, "dclo", + [(set CPU64Regs:$dst, (ctlz (not CPU64Regs:$src)))]>; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +// Small immediates +def : Pat<(i64 immSExt16:$in), + (DADDiu ZERO_64, imm:$in)>; +def : Pat<(i64 immZExt16:$in), + (ORi64 ZERO_64, imm:$in)>; + +// zextloadi32_u +def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>, + Requires<[IsN64]>; +def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>, + Requires<[NotN64]>; + +// hi/lo relocs +def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; + +defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, + ZERO_64>; + +// setcc patterns +defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>; +defm : SetlePats<CPU64Regs, SLT64, SLTu64>; +defm : SetgtPats<CPU64Regs, SLT64, SLTu64>; +defm : SetgePats<CPU64Regs, SLT64, SLTu64>; +defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>; diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 69e03bd..0e82681 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,6 +18,7 @@ #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" +#include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" @@ -33,15 +35,22 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/DebugInfo.h" using namespace llvm; +static bool isUnalignedLoadStore(unsigned Opc) { + return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu || + Opc == Mips::USW || Opc == Mips::USH || + Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 || + Opc == Mips::USW_P8 || Opc == Mips::USH_P8; +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -52,8 +61,21 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { } MipsMCInstLower MCInstLowering(Mang, *MF, *this); + unsigned Opc = MI->getOpcode(); MCInst TmpInst0; MCInstLowering.Lower(MI, TmpInst0); + + // Enclose unaligned load or store with .macro & .nomacro directives. + if (isUnalignedLoadStore(Opc)) { + MCInst Directive; + Directive.setOpcode(Mips::MACRO); + OutStreamer.EmitInstruction(Directive); + OutStreamer.EmitInstruction(TmpInst0); + Directive.setOpcode(Mips::NOMACRO); + OutStreamer.EmitInstruction(Directive); + return; + } + OutStreamer.EmitInstruction(TmpInst0); } @@ -180,7 +202,6 @@ void MipsAsmPrinter::emitFrameDirective() { const char *MipsAsmPrinter::getCurrentABIString() const { switch (Subtarget->getTargetABI()) { case MipsSubtarget::O32: return "abi32"; - case MipsSubtarget::O64: return "abiO64"; case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 @@ -304,6 +325,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MipsII::MO_GOTTPREL: O << "%gottprel("; break; case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break; case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break; + case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break; + case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break; + case MipsII::MO_GOT_DISP: O << "%got_disp("; break; + case MipsII::MO_GOT_PAGE: O << "%got_page("; break; + case MipsII::MO_GOT_OFST: O << "%got_ofst("; break; } switch (MO.getType()) { @@ -424,17 +450,9 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, } // Force static initialization. -static MCInstPrinter *createMipsMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - return new MipsInstPrinter(MAI); -} - extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); - - TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, - createMipsMCInstPrinter); + RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target); + RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget); } diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td index 876f0fc..0ae4ef6 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td +++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td @@ -31,6 +31,55 @@ def RetCC_MipsO32 : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips N32/64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_MipsN : CallingConv<[ + // FIXME: Handle byval, complex and float double parameters. + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Integer arguments are passed in integer registers. + CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64], + [D12_64, D13_64, D14_64, D15_64, + D16_64, D17_64, D18_64, D19_64]>>, + + // f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15, + F16, F17, F18, F19], + [A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + // f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64, + D16_64, D17_64, D18_64, D19_64], + [A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[f32], CCAssignToStack<4, 8>> +]>; + +def RetCC_MipsN : CallingConv<[ + // FIXME: Handle complex and float double return values. + + // i32 are returned in registers V0, V1 + CCIfType<[i32], CCAssignToReg<[V0, V1]>>, + + // i64 are returned in registers V0_64, V1_64 + CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>, + + // f32 are returned in registers F0, F2 + CCIfType<[f32], CCAssignToReg<[F0, F2]>>, + + // f64 are returned in registers D0, D2 + CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>> +]>; + +//===----------------------------------------------------------------------===// // Mips EABI Calling Convention //===----------------------------------------------------------------------===// @@ -77,10 +126,14 @@ def RetCC_MipsEABI : CallingConv<[ //===----------------------------------------------------------------------===// def CC_Mips : CallingConv<[ - CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>> + CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, + CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>, + CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>> ]>; def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, + CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, + CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, CCDelegateTo<RetCC_MipsO32> ]>; diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp new file mode 100644 index 0000000..9220d9c --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp @@ -0,0 +1,245 @@ +//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Mips machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include <iomanip> +#endif + +#include "llvm/CodeGen/MachineOperand.h" + +using namespace llvm; + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class MipsCodeEmitter : public MachineFunctionPass { + MipsJITInfo *JTI; + const MipsInstrInfo *II; + const TargetData *TD; + const MipsSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector<MachineConstantPoolEntry> *MCPEs; + const std::vector<MachineJumpTableEntry> *MJTEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineModuleInfo> (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : + MachineFunctionPass(ID), JTI(0), + II((const MipsInstrInfo *) tm.getInstrInfo()), + TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) { + } + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "Mips Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Word); + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + }; +} + +char MipsCodeEmitter::ID = 0; + +bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo(); + II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); + TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData(); + Subtarget = &TM.getSubtarget<MipsSubtarget> (); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = 0; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + JTI->Initialize(MF, IsPIC); + MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getFunction()->getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + // NOTE: This relocations are for static. + uint64_t TSFlags = MI.getDesc().TSFlags; + uint64_t Form = TSFlags & MipsII::FormMask; + if (Form == MipsII::FrmJ) + return Mips::reloc_mips_26; + if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) + && MI.getDesc().isBranch()) + return Mips::reloc_mips_branch; + if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) + return Mips::reloc_mips_hi; + return Mips::reloc_mips_lo; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); + else if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast<GlobalValue *>(GV), 0, MayNeedFarStub)); +} + +void MipsCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0, false)); +} + +void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void MipsCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, false)); +} + +void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + +void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + // Skip pseudo instructions. + if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) + return; + + ++NumEmitted; // Keep track of the # of mi's emitted + + switch (MI.getOpcode()) { + default: + emitWordLE(getBinaryCodeForInstr(MI)); + break; + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +void MipsCodeEmitter::emitWordLE(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + MCE.emitWordLE(Word); +} + +/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips +/// code to the specified MCE object. +FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE) { + return new MipsCodeEmitter(TM, JCE); +} + +unsigned MipsCodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const { + // this function will be automatically generated by the CodeEmitterGenerator + // using TableGen + return 0; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index c3a6211..be3b7a0 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with NOPs. +// Simple pass to fills delay slots with useful instructions. // //===----------------------------------------------------------------------===// @@ -17,18 +17,31 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); +STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" + " are not NOP."); + +static cl::opt<bool> EnableDelaySlotFiller( + "enable-mips-delay-filler", + cl::init(false), + cl::desc("Fill the Mips delay slots useful instructions."), + cl::Hidden); namespace { struct Filler : public MachineFunctionPass { TargetMachine &TM; const TargetInstrInfo *TII; + MachineBasicBlock::iterator LastFiller; static char ID; Filler(TargetMachine &tm) @@ -47,31 +60,61 @@ namespace { return Changed; } + bool isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate); + + void insertCallUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); + + void insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); + + bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, + unsigned Reg); + + bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); + + bool + findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot, + MachineBasicBlock::iterator &Filler); + + }; char Filler::ID = 0; } // end of anonymous namespace /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. -/// Currently, we fill delay slots with NOPs. We assume there is only one -/// delay slot per delayed instruction. +/// We assume there is only one delay slot per delayed instruction. bool Filler:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) -{ +runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { - const MCInstrDesc& MCid = I->getDesc(); - if (MCid.hasDelaySlot() && - (TM.getSubtarget<MipsSubtarget>().isMips1() || - MCid.isCall() || MCid.isBranch() || MCid.isReturn())) { - MachineBasicBlock::iterator J = I; - ++J; - BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); + LastFiller = MBB.end(); + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { ++FilledSlots; Changed = true; - } - } + MachineBasicBlock::iterator D; + + if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { + MBB.splice(llvm::next(I), &MBB, D); + ++UsefulSlots; + } + else + BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + + // Record the filler instruction that filled the delay slot. + // The instruction after it will be visited in the next iteration. + LastFiller = ++I; + } return Changed; + } /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay @@ -80,3 +123,134 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } +bool Filler::findDelayInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator slot, + MachineBasicBlock::iterator &Filler) { + SmallSet<unsigned, 32> RegDefs; + SmallSet<unsigned, 32> RegUses; + + insertDefsUses(slot, RegDefs, RegUses); + + bool sawLoad = false; + bool sawStore = false; + + for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) { + // skip debug value + if (I->isDebugValue()) + continue; + + // Convert to forward iterator. + MachineBasicBlock::iterator FI(llvm::next(I).base()); + + if (I->hasUnmodeledSideEffects() + || I->isInlineAsm() + || I->isLabel() + || FI == LastFiller + || I->getDesc().isPseudo() + // + // Should not allow: + // ERET, DERET or WAIT, PAUSE. Need to add these to instruction + // list. TBD. + ) + break; + + if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) { + insertDefsUses(FI, RegDefs, RegUses); + continue; + } + + Filler = FI; + return true; + } + + return false; +} + +bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, + bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses) { + if (candidate->isImplicitDef() || candidate->isKill()) + return true; + + MCInstrDesc MCID = candidate->getDesc(); + // Loads or stores cannot be moved past a store to the delay slot + // and stores cannot be moved past a load. + if (MCID.mayLoad()) { + if (sawStore) + return true; + sawLoad = true; + } + + if (MCID.mayStore()) { + if (sawStore) + return true; + sawStore = true; + if (sawLoad) + return true; + } + + assert((!MCID.isCall() && !MCID.isReturn()) && + "Cannot put calls or returns in delay slot."); + + for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { + const MachineOperand &MO = candidate->getOperand(i); + unsigned Reg; + + if (!MO.isReg() || !(Reg = MO.getReg())) + continue; // skip + + if (MO.isDef()) { + // check whether Reg is defined or used before delay slot. + if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) + return true; + } + if (MO.isUse()) { + // check whether Reg is defined before delay slot. + if (IsRegInSet(RegDefs, Reg)) + return true; + } + } + return false; +} + +// Insert Defs and Uses of MI into the sets RegDefs and RegUses. +void Filler::insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses) { + // If MI is a call or return, just examine the explicit non-variadic operands. + MCInstrDesc MCID = MI->getDesc(); + unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() : + MI->getNumOperands(); + + // Add RA to RegDefs to prevent users of RA from going into delay slot. + if (MCID.isCall()) + RegDefs.insert(Mips::RA); + + for (unsigned i = 0; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + unsigned Reg; + + if (!MO.isReg() || !(Reg = MO.getReg())) + continue; + + if (MO.isDef()) + RegDefs.insert(Reg); + else if (MO.isUse()) + RegUses.insert(Reg); + } +} + +//returns true if the Reg or its alias is in the RegSet. +bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { + if (RegSet.count(Reg)) + return true; + // check Aliased Registers + for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + *Alias; ++Alias) + if (RegSet.count(*Alias)) + return true; + + return false; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp index a0f90a0..22d1e47 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -254,9 +254,15 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { } // Restore GP from the saved stack location - if (MipsFI->needGPSaveRestore()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) - .addImm(MFI->getObjectOffset(MipsFI->getGPFI())); + if (MipsFI->needGPSaveRestore()) { + unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI()); + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset); + + if (Offset >= 0x8000) { + BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO)); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); + } + } } void MipsFrameLowering::emitEpilogue(MachineFunction &MF, @@ -300,13 +306,6 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, } } -void -MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Mips::SP, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h index 78c78ee..c249756 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -27,7 +27,8 @@ protected: public: explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { + : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0), + STI(sti) { } bool targetHandlesStackFrameRounding() const; @@ -39,8 +40,6 @@ public: bool hasFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; }; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 90aaeb6..9c831ed 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,9 +86,6 @@ private: // Complex Pattern. bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); - SDNode *SelectLoadFp64(SDNode *N); - SDNode *SelectStoreFp64(SDNode *N); - // getI32Imm - Return a target constant with the specified // value, of type i32. inline SDValue getI32Imm(unsigned Imm) { @@ -114,17 +111,20 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + EVT ValTy = Addr.getValueType(); + unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64; + // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if (Addr.getOpcode() == MipsISD::WrapperPIC) { - Base = CurDAG->getRegister(Mips::GP, MVT::i32); + Base = CurDAG->getRegister(GPReg, ValTy); Offset = Addr.getOperand(0); return true; } @@ -133,7 +133,7 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) { - Base = CurDAG->getRegister(Mips::GP, MVT::i32); + Base = CurDAG->getRegister(GPReg, ValTy); Offset = Addr; return true; } @@ -147,11 +147,11 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } @@ -180,134 +180,10 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, ValTy); return true; } -SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { - MVT::SimpleValueType NVT = - N->getValueType(0).getSimpleVT().SimpleTy; - - if (!Subtarget.isMips1() || NVT != MVT::f64) - return NULL; - - LoadSDNode *LN = cast<LoadSDNode>(N); - if (LN->getExtensionType() != ISD::NON_EXTLOAD || - LN->getAddressingMode() != ISD::UNINDEXED) - return NULL; - - SDValue Chain = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue Offset0, Offset1, Base; - - if (!SelectAddr(N1, Base, Offset0) || - N1.getValueType() != MVT::i32) - return NULL; - - MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); - MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N->getDebugLoc(); - - // The second load should start after for 4 bytes. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); - else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), - MVT::i32, - CP->getAlignment(), - CP->getOffset()+4, - CP->getTargetFlags()); - else - return NULL; - - // Choose the offsets depending on the endianess - if (TM.getTargetData()->isBigEndian()) - std::swap(Offset0, Offset1); - - // Instead of: - // ldc $f0, X($3) - // Generate: - // lwc $f0, X($3) - // lwc $f1, X+4($3) - SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Base, Offset0, Chain); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, - MVT::f64, Undef, SDValue(LD0, 0)); - - SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Base, Offset1, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, - MVT::f64, I0, SDValue(LD1, 0)); - - ReplaceUses(SDValue(N, 0), I1); - ReplaceUses(SDValue(N, 1), Chain); - cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1); - cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1); - return I1.getNode(); -} - -SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { - - if (!Subtarget.isMips1() || - N->getOperand(1).getValueType() != MVT::f64) - return NULL; - - SDValue Chain = N->getOperand(0); - - StoreSDNode *SN = cast<StoreSDNode>(N); - if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED) - return NULL; - - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - SDValue Offset0, Offset1, Base; - - if (!SelectAddr(N2, Base, Offset0) || - N1.getValueType() != MVT::f64 || - N2.getValueType() != MVT::i32) - return NULL; - - MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); - MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N->getDebugLoc(); - - // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, - dl, MVT::f32, N1); - SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, - dl, MVT::f32, N1); - - // The second store should start after for 4 bytes. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); - else - return NULL; - - // Choose the offsets depending on the endianess - if (TM.getTargetData()->isBigEndian()) - std::swap(Offset0, Offset1); - - // Instead of: - // sdc $f0, X($3) - // Generate: - // swc $f0, X($3) - // swc $f1, X+4($3) - SDValue Ops0[] = { FPEven, Base, Offset0, Chain }; - Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, - MVT::Other, Ops0, 4), 0); - cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - - SDValue Ops1[] = { FPOdd, Base, Offset1, Chain }; - Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, - MVT::Other, Ops1, 4), 0); - cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - - ReplaceUses(SDValue(N, 0), Chain); - return Chain.getNode(); -} - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { @@ -364,6 +240,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Mul with two results case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { + assert(Node->getValueType(0) != MVT::i64 && + "64-bit multiplication with two results not handled."); SDValue Op1 = Node->getOperand(0); SDValue Op2 = Node->getOperand(1); @@ -389,21 +267,29 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Special Muls case ISD::MUL: - if (Subtarget.isMips32()) + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32) break; case ISD::MULHS: case ISD::MULHU: { + assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) && + "64-bit MULH* not handled."); + EVT Ty = Node->getValueType(0); SDValue MulOp1 = Node->getOperand(0); SDValue MulOp2 = Node->getOperand(1); - unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + unsigned MulOp = (Opcode == ISD::MULHU ? + Mips::MULTu : + (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT)); SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, MVT::Glue, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); - if (Opcode == ISD::MUL) - return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag); + if (Opcode == ISD::MUL) { + unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + return CurDAG->getMachineNode(Opc, dl, Ty, InFlag); + } else return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); } @@ -417,31 +303,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Mips::ZERO, MVT::i32); - SDValue Undef = SDValue( - CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); - SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, - MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, - MVT::f64, I0, SDValue(MTC, 0)); - ReplaceUses(SDValue(Node, 0), I1); - return I1.getNode(); + return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, + Zero); } break; } - case ISD::LOAD: - if (SDNode *ResNode = SelectLoadFp64(Node)) - return ResNode; - // Other cases are autogenerated. - break; - - case ISD::STORE: - if (SDNode *ResNode = SelectStoreFp64(Node)) - return ResNode; - // Other cases are autogenerated. - break; - case MipsISD::ThreadPointer: { unsigned SrcReg = Mips::HWR29; unsigned DestReg = Mips::V1; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index b4f4b1b..1932e74 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -35,6 +35,18 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +// If I is a shifted mask, set the size (Size) and the first bit of the +// mask (Pos), and return true. +// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). +static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { + if (!isUInt<32>(I) || !isShiftedMask_32(I)) + return false; + + Size = CountPopulation_32(I); + Pos = CountTrailingZeros_32(I); + return true; +} + const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MipsISD::JmpLink: return "MipsISD::JmpLink"; @@ -61,27 +73,38 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; + case MipsISD::Sync: return "MipsISD::Sync"; + case MipsISD::Ext: return "MipsISD::Ext"; + case MipsISD::Ins: return "MipsISD::Ins"; default: return NULL; } } MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) - : TargetLowering(TM, new MipsTargetObjectFile()) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); + : TargetLowering(TM, new MipsTargetObjectFile()), + Subtarget(&TM.getSubtarget<MipsSubtarget>()), + HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + if (HasMips64) + addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass); + // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) - if (!Subtarget->isFP64bit()) + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); + else addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + } // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -100,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); @@ -115,6 +139,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -126,10 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); - if (!Subtarget->isMips32r2()) + if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); + if (!Subtarget->hasMips64r2()) + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); @@ -159,7 +191,14 @@ MipsTargetLowering(MipsTargetMachine &TM) // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + + setInsertFencesForAtomic(true); if (Subtarget->isSingleFloat()) setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); @@ -180,6 +219,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setMinFunctionAlignment(2); @@ -190,7 +231,12 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionSelectorRegister(Mips::A1); } -MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const { +bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; +} + +EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -348,7 +394,7 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMadd(N, &DAG)) + if (Subtarget->hasMips32() && SelectMadd(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -360,7 +406,7 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMsub(N, &DAG)) + if (Subtarget->hasMips32() && SelectMsub(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -372,6 +418,9 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + EVT Ty = N->getValueType(0); + unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; + unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : MipsISD::DivRemU; DebugLoc dl = N->getDebugLoc(); @@ -383,7 +432,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFLO if (N->hasAnyUseOfValue(0)) { - SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); @@ -393,7 +442,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, - Mips::HI, MVT::i32, InGlue); + HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } @@ -490,6 +539,101 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); } +static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match EXT. + // $dst = and ((sra or srl) $src , pos), (2**size - 1) + // => ext $dst, $src, size, pos + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); + + // Op's first operand must be a shift right. + if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL) + return SDValue(); + + // The second operand of the shift must be an immediate. + uint64_t Pos; + ConstantSDNode *CN; + if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) + return SDValue(); + + Pos = CN->getZExtValue(); + + uint64_t SMPos, SMSize; + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || + !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + return SDValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Pos exceeds the word's size. + if (SMPos != 0 || Pos + SMSize > 32) + return SDValue(); + + return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32, + ShiftRight.getOperand(0), + DAG.getConstant(Pos, MVT::i32), + DAG.getConstant(SMSize, MVT::i32)); +} + +static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match INS. + // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), + // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 + // => ins $dst, $src, size, pos, $src1 + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); + uint64_t SMPos0, SMSize0, SMPos1, SMSize1; + ConstantSDNode *CN; + + // See if Op's first operand matches (and $src1 , mask0). + if (And0.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) || + !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) + return SDValue(); + + // See if Op's second operand matches (and (shl $src, pos), mask1). + if (And1.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) || + !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + return SDValue(); + + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + if (Shl.getOpcode() != ISD::SHL) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + if (Shamt != SMPos0) + return SDValue(); + + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32, + Shl.getOperand(0), + DAG.getConstant(SMPos0, MVT::i32), + DAG.getConstant(SMSize0, MVT::i32), + And0.getOperand(0)); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -506,6 +650,10 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); + case ISD::AND: + return PerformANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return PerformORCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -527,6 +675,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); } return SDValue(); } @@ -733,13 +883,13 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + unsigned OldVal = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Oldval = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -758,61 +908,38 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // thisMBB: // ... - // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0) // fallthrough --> loopMBB - - // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. - - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr).addFrameIndex(fi).addImm(0); - } BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); // loopMBB: // ll oldval, 0(ptr) - // or dest, $0, oldval - // <binop> tmp1, oldval, incr - // sc tmp1, 0(ptr) - // beq tmp1, $0, loopMBB + // <binop> storeval, oldval, incr + // sc success, storeval, 0(ptr) + // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { - // and tmp2, oldval, incr - // nor tmp1, $0, tmp2 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + // and andres, oldval, incr + // nor storeval, $0, andres + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal) + .addReg(Mips::ZERO).addReg(AndRes); } else if (BinOpcode) { - // <binop> tmp1, oldval, incr - BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); + // <binop> storeval, oldval, incr + BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { - // lw tmp2, fi(sp) // load incr from stack - // or tmp1, $zero, tmp2 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + StoreVal = Incr; } - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -833,33 +960,34 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Newval = RegInfo.createVirtualRegister(RC); - unsigned Oldval = RegInfo.createVirtualRegister(RC); + unsigned NewVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); - unsigned Tmp10 = RegInfo.createVirtualRegister(RC); - unsigned Tmp11 = RegInfo.createVirtualRegister(RC); - unsigned Tmp12 = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned BinOpRes = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -868,111 +996,104 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,incr,255 - // sll incr2,tmp4,shift - // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0) - - // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. + // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - if (BinOpcode != Mips::SUBu) { - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift); - } else { - BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); - } + BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr2).addFrameIndex(fi).addImm(0); - } - BB->addSuccessor(loopMBB); + // atomic.load.binop + // loopMBB: + // ll oldval,0(alignedaddr) + // binop binopres,oldval,incr2 + // and newval,binopres,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + + // atomic.swap // loopMBB: - // ll oldval,0(addr) - // binop tmp7,oldval,incr2 - // and newval,tmp7,mask - // and tmp8,oldval,mask2 - // or tmp9,tmp8,newval - // sc tmp9,0(addr) - // beq tmp9,$0,loopMBB + // ll oldval,0(alignedaddr) + // and newval,incr2,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { - // and tmp6, oldval, incr2 - // nor tmp7, $0, tmp6 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); - } else if (BinOpcode == Mips::SUBu) { - // addu tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2); + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes) + .addReg(Mips::ZERO).addReg(AndRes); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else if (BinOpcode) { - // <binop> tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); - } else { - // lw tmp6, fi(sp) // load incr2 from stack - // or tmp7, $zero, tmp6 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); + // <binop> binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); + } else {// atomic.swap + // and newval, incr2, mask + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } - BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal0).addReg(NewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // and tmp10,oldval,mask - // srl tmp11,tmp10,shift - // sll tmp12,tmp11,24 - // sra dest,tmp12,24 - BB = exitMBB; + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp12).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12) - .addReg(Tmp11).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11) - .addReg(Tmp10).addReg(Shift); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10) - .addReg(Oldval).addReg(Mask); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal1); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -989,11 +1110,10 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned OldVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1012,26 +1132,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - int fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - // thisMBB: // ... - // sw newval, fi(sp) // store newval to stack // fallthrough --> loop1MBB - - // Note: storing newval to stack before the loop and then loading it from - // stack in block loop2MBB is necessary to prevent MachineLICM pass to - // hoist "or" instruction out of the block loop2MBB. - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Newval).addFrameIndex(fi).addImm(0); BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); // loop1MBB: // ll dest, 0(ptr) @@ -1039,27 +1147,20 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB = loop1MBB; BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Dest).addReg(Oldval).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: - // lw tmp2, fi(sp) // load newval from stack - // or tmp1, $0, tmp2 - // sc tmp1, 0(ptr) - // beq tmp1, $0, loop1MBB + // sc success, newval, 0(ptr) + // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(NewVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -1077,36 +1178,39 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned CmpVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval3 = RegInfo.createVirtualRegister(RC); - unsigned Oldval4 = RegInfo.createVirtualRegister(RC); - unsigned Newval2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1115,76 +1219,90 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + + // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,oldval,255 - // sll oldval2,tmp4,shift - // andi tmp5,newval,255 - // sll newval2,tmp5,shift + // andi maskedcmpval,cmpval,255 + // sll shiftedcmpval,maskedcmpval,shiftamt + // andi maskednewval,newval,255 + // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift); - BB->addSuccessor(loop1MBB); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal) + .addReg(CmpVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal) + .addReg(ShiftAmt).addReg(MaskedCmpVal); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal) + .addReg(NewVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal) + .addReg(ShiftAmt).addReg(MaskedNewVal); // loop1MBB: - // ll oldval3,0(addr) - // and oldval4,oldval3,mask - // bne oldval4,oldval2,exitMBB + // ll oldval,0(alginedaddr) + // and maskedoldval0,oldval,mask + // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); // loop2MBB: - // and tmp6,oldval3,mask2 - // or tmp7,tmp6,newval2 - // sc tmp7,0(addr) - // beq tmp7,$0,loop1MBB + // and maskedoldval1,oldval,mask2 + // or storeval,maskedoldval1,shiftednewval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp7) - .addReg(Tmp7).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal1).addReg(ShiftedNewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - // exitMBB: - // srl tmp8,oldval4,shift - // sll tmp9,tmp8,24 - // sra dest,tmp9,24 - BB = exitMBB; + // sinkMBB: + // srl srlres,maskedoldval0,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp9).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9) - .addReg(Tmp8).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8) - .addReg(Oldval4).addReg(Shift); + + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal0); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } //===----------------------------------------------------------------------===// @@ -1267,9 +1385,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); @@ -1292,21 +1410,26 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_GOT); - GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA); - SDValue ResNode = DAG.getLoad(MVT::i32, dl, + EVT ValTy = Op.getValueType(); + bool HasGotOfst = (GV->hasInternalLinkage() || + (GV->hasLocalLinkage() && !isa<Function>(GV))); + unsigned GotFlag = IsN64 ? + (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : + MipsII::MO_GOT; + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); + GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); + SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasInternalLinkage() && - (!GV->hasLocalLinkage() || isa<Function>(GV))) + if (!HasGotOfst) return ResNode; - SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); - return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, + IsN64 ? MipsII::MO_GOT_OFST : + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo); + return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo); } SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, @@ -1361,11 +1484,11 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); std::pair<SDValue, SDValue> CallResult = LowerCallTo(DAG.getEntryNode(), - (const Type *) Type::getInt32Ty(*DAG.getContext()), + (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -1557,6 +1680,25 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// TODO: set SType according to the desired memory barrier behavior. +SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, + SelectionDAG& DAG) const { + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + +SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + // FIXME: Need pseudo-fence for 'singlethread' fences + // FIXME: Set SType for weaker fences where supported/appropriate. + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1679,55 +1821,109 @@ static const unsigned O32IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; +// Return next O32 integer argument register. +static unsigned getNextIntArgReg(unsigned Reg) { + assert((Reg == Mips::A0) || (Reg == Mips::A2)); + return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; +} + // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& Chain, DebugLoc dl, +WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, SmallVector<SDValue, 8>& MemOpChains, int& LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, - MVT PtrType) { - unsigned FirstWord = VA.getLocMemOffset() / 4; - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - unsigned LastWord = FirstWord + NumWords; - unsigned CurWord; - - // copy the first 4 words of byval arg to registers A0 - A3 - for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); - ++CurWord) { + MVT PtrType, bool isLittle) { + unsigned LocMemOffset = VA.getLocMemOffset(); + unsigned Offset = 0; + uint32_t RemainingSize = Flags.getByValSize(); + unsigned ByValAlign = Flags.getByValAlign(); + + // Copy the first 4 words of byval arg to registers A0 - A3. + // FIXME: Use a stricter alignment if it enables better optimization in passes + // run later. + for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; + Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); + DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, std::min(ByValAlign, + (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[CurWord]; + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); } - // copy remaining part of byval arg to stack. - if (CurWord < LastWord) { - unsigned SizeInBytes = (LastWord - CurWord) * 4; - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); - LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(SizeInBytes, MVT::i32), - /*Align*/4, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + if (RemainingSize == 0) + return; + + // If there still is a register available for argument passing, write the + // remaining part of the structure to it using subword loads and shifts. + if (LocMemOffset < 4 * 4) { + assert(RemainingSize <= 3 && RemainingSize >= 1 && + "There must be one to three bytes remaining."); + unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )4); + SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSize * 8), false, + false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // If target is big endian, shift it to the most significant half-word or + // byte. + if (!isLittle) + LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, + DAG.getConstant(32 - LoadSize * 8, MVT::i32)); + + Offset += LoadSize; + RemainingSize -= LoadSize; + + // Read second subword if necessary. + if (RemainingSize != 0) { + assert(RemainingSize == 1 && "There must be one byte remaining."); + LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )2); + SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::i8, false, false, Alignment); + MemOpChains.push_back(Subword.getValue(1)); + // Insert the loaded byte to LoadVal. + // FIXME: Use INS if supported by target. + unsigned ShiftAmt = isLittle ? 16 : 8; + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, + DAG.getConstant(ShiftAmt, MVT::i32)); + LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); + } + + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; + RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); + return; } + + // Create a fixed object on stack at offset LocMemOffset and copy + // remaining part of byval arg to it using memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. SDValue -MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, +MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -1757,8 +1953,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset, - true)); + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue Chain, CallSeqStart, ByValChain; + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); + ByValChain = InChain; // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. @@ -1818,8 +2019,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); - RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); + unsigned LocRegLo = VA.getLocReg(); + unsigned LocRegHigh = getNextIntArgReg(LocRegLo); + RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); + RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } @@ -1852,8 +2055,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, "No support for ByVal args by ABIs other than O32 yet."); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, - VA, Flags, getPointerTy()); + WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, + DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; } @@ -1875,6 +2078,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (LastFI) MipsFI->extendOutArgFIRange(FirstFI, LastFI); + // If a memcpy has been created to copy a byval arg to a stack, replace the + // chain input of CallSeqStart with ByValChain. + if (InChain != ByValChain) + DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, + NextStackOffsetVal); + // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2071,12 +2280,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (RegVT == MVT::i32) RC = Mips::CPURegsRegisterClass; + else if (RegVT == MVT::i64) + RC = Mips::CPU64RegsRegisterClass; else if (RegVT == MVT::f32) RC = Mips::FGR32RegisterClass; - else if (RegVT == MVT::f64) { - if (!Subtarget->isSingleFloat()) - RC = Mips::AFGR64RegisterClass; - } else + else if (RegVT == MVT::f64) + RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass; + else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on @@ -2105,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), - VA.getLocReg()+1, RC); + getNextIntArgReg(ArgReg), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); if (!Subtarget->isLittle()) std::swap(ArgValue, ArgValue2); @@ -2313,7 +2523,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index bda26a2..4be3fed5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -81,7 +81,12 @@ namespace llvm { WrapperPIC, - DynAlloc + DynAlloc, + + Sync, + + Ext, + Ins }; } @@ -93,6 +98,8 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); + virtual bool allowsUnalignedMemoryAccesses (EVT VT) const; + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -101,13 +108,14 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - get the ISD::SETCC result ValueType - MVT::SimpleValueType getSetCCResultType(EVT VT) const; + EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: // Subtarget Info const MipsSubtarget *Subtarget; - + + bool HasMips64, IsN64; // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, @@ -128,6 +136,8 @@ namespace llvm { SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td index 021c167..2fb9d18 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -27,7 +27,7 @@ def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, - SDTCisInt<2>]>; + SDTCisVT<2, i32>]>; def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, @@ -35,12 +35,11 @@ def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>]>; def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, f64>, - SDTCisVT<0, i32>]>; + SDTCisVT<2, i32>]>; def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>; def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>; -def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>; def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, [SDNPHasChain, SDNPOptInGlue]>; def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>; @@ -55,10 +54,10 @@ let PrintMethod = "printFCCOperand" in // Feature predicates. //===----------------------------------------------------------------------===// -def In32BitMode : Predicate<"!Subtarget.isFP64bit()">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">; def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; -def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates @@ -74,97 +73,87 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -multiclass FFR1_1<bits<6> funct, string asmstr> -{ - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s\t$fd, $fs"), []>; - - def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>; +// FP load. +class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, + Operand MemOpnd>: + FFI<op, (outs RC:$ft), (ins MemOpnd:$base), + !strconcat(opstr, "\t$ft, $base"), [(set RC:$ft, (FOp addr:$base))]>; + +// FP store. +class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, + Operand MemOpnd>: + FFI<op, (outs), (ins RC:$ft, MemOpnd:$base), + !strconcat(opstr, "\t$ft, $base"), [(store RC:$ft, addr:$base)]>; + +// Instructions that convert an FP value to 32-bit fixed point. +multiclass FFR1_W_M<bits<6> funct, string opstr> { + def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>; + def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>, + Requires<[NotFP64bit]>; + def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>, + Requires<[IsFP64bit]>; } -multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp> -{ - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s\t$fd, $fs"), - [(set FGR32:$fd, (FOp FGR32:$fs))]>; - - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d\t$fd, $fs"), - [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>; +// Instructions that convert an FP value to 64-bit fixed point. +let Predicates = [IsFP64bit] in +multiclass FFR1_L_M<bits<6> funct, string opstr> { + def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>; + def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>; } -class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc, - RegisterClass RcDst, string asmstr>: - FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), - !strconcat(asmstr, "\t$fd, $fs"), []>; - +// FP-to-FP conversion instructions. +multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> { + def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>; + def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>, + Requires<[NotFP64bit]>; + def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>, + Requires<[IsFP64bit]>; +} -multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> { +multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { let isCommutable = isComm in { - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), - (ins FGR32:$fs, FGR32:$ft), - !strconcat(asmstr, ".s\t$fd, $fs, $ft"), - [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>; - - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), - (ins AFGR64:$fs, AFGR64:$ft), - !strconcat(asmstr, ".d\t$fd, $fs, $ft"), - [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>, - Requires<[In32BitMode]>; + def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>; + def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>, + Requires<[NotFP64bit]>; + def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>, + Requires<[IsFP64bit]>; } } //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// +defm ROUND_W : FFR1_W_M<0xc, "round">; +defm ROUND_L : FFR1_L_M<0x8, "round">; +defm TRUNC_W : FFR1_W_M<0xd, "trunc">; +defm TRUNC_L : FFR1_L_M<0x9, "trunc">; +defm CEIL_W : FFR1_W_M<0xe, "ceil">; +defm CEIL_L : FFR1_L_M<0xa, "ceil">; +defm FLOOR_W : FFR1_W_M<0xf, "floor">; +defm FLOOR_L : FFR1_L_M<0xb, "floor">; +defm CVT_W : FFR1_W_M<0x24, "cvt">; +defm CVT_L : FFR1_L_M<0x25, "cvt">; + +def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; + +let Predicates = [NotFP64bit] in { + def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; + def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; + def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; +} -let ft = 0 in { - defm FLOOR_W : FFR1_1<0b001111, "floor.w">; - defm CEIL_W : FFR1_1<0b001110, "ceil.w">; - defm ROUND_W : FFR1_1<0b001100, "round.w">; - defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; - defm CVTW : FFR1_1<0b100100, "cvt.w">; - - defm FABS : FFR1_2<0b000101, "abs", fabs>; - defm FNEG : FFR1_2<0b000111, "neg", fneg>; - defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>; - - /// Convert to Single Precison - def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">; - - let Predicates = [IsNotSingleFloat] in { - /// Ceil to long signed integer - def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">; - def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">; - - /// Round to long signed integer - def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">; - def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">; - - /// Floor to long signed integer - def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">; - def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">; - - /// Trunc to long signed integer - def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">; - def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">; - - /// Convert to long signed integer - def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; - def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; - - /// Convert to Double Precison - def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; - def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; - def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; - - /// Convert to Single Precison - def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">; - def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; - } +let Predicates = [IsFP64bit] in { + def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; + def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; + def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; + def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>; + def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; } +defm FABS : FFR1P_M<0x5, "abs", fabs>; +defm FNEG : FFR1P_M<0x7, "neg", fneg>; +defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>; + // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. // When defining instructions, we reference all 32-bit registers, @@ -178,37 +167,46 @@ let fd = 0 in { "ctc1\t$fs, $rt", []>; def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs), - "mfc1\t$rt, $fs", []>; + "mfc1\t$rt, $fs", + [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>; def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt), - "mtc1\t$rt, $fs", []>; + "mtc1\t$rt, $fs", + [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>; } -def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - "mov.s\t$fd, $fs", []>; -def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - "mov.d\t$fd, $fs", []>; +def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; +def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, + Requires<[NotFP64bit]>; +def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, + Requires<[IsFP64bit]>; /// Floating Point Memory Instructions -let Predicates = [IsNotSingleFloat, IsNotMipsI] in { - def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), - "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; - - def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), - "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>; +let Predicates = [IsN64] in { + def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>; + def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>; + def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>; + def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>; } -// LWC1 and SWC1 can always be emitted with odd registers. -def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr", - [(set FGR32:$ft, (load addr:$addr))]>; -def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), - "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>; +let Predicates = [NotN64] in { + def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>; + def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>; + let Predicates = [HasMips64] in { + def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>; + def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>; + } + let Predicates = [NotMips64] in { + def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>; + def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>; + } +} /// Floating-point Aritmetic -defm FADD : FFR1_4<0x10, "add", fadd, 1>; -defm FDIV : FFR1_4<0x03, "div", fdiv>; -defm FMUL : FFR1_4<0x02, "mul", fmul, 1>; -defm FSUB : FFR1_4<0x01, "sub", fsub>; +defm FADD : FFR2P_M<0x10, "add", fadd, 1>; +defm FDIV : FFR2P_M<0x03, "div", fdiv>; +defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>; +defm FSUB : FFR2P_M<0x01, "sub", fsub>; //===----------------------------------------------------------------------===// // Floating Point Branch Codes @@ -217,8 +215,6 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>; // They must be kept in synch. def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def MIPS_BRANCH_FL : PatLeaf<(i32 2)>; -def MIPS_BRANCH_TL : PatLeaf<(i32 3)>; /// Floating Point Branch of False/True (Likely) let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in @@ -228,8 +224,6 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">; def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">; -def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">; -def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions @@ -254,7 +248,7 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare -let hasDelaySlot = 1, Defs=[FCR31] in { +let Defs=[FCR31] in { def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), "c.$cc.s\t$fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>; @@ -262,7 +256,7 @@ let hasDelaySlot = 1, Defs=[FCR31] in { def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), "c.$cc.d\t$fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>, - Requires<[In32BitMode]>; + Requires<[NotFP64bit]>; } @@ -280,7 +274,7 @@ class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func, def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">; def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">; def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">; } @@ -288,7 +282,7 @@ let Predicates = [In32BitMode] in { defm : MovzPats<FGR32, MOVZ_S>; defm : MovnPats<FGR32, MOVN_S>; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { defm : MovzPats<AFGR64, MOVZ_D>; defm : MovnPats<AFGR64, MOVN_D>; } @@ -313,7 +307,7 @@ def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">; def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; } @@ -353,22 +347,16 @@ def fpimm0neg : PatLeaf<(fpimm), [{ }]>; def : Pat<(f32 fpimm0), (MTC1 ZERO)>; -def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>; +def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; -def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>; -def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>; +def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; +def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; -def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>; +def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; -def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>; -def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>; - -let Predicates = [In32BitMode] in { - def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>; +let Predicates = [NotFP64bit] in { + def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; + def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; } -// MipsFPRound is only emitted for MipsI targets. -def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>; - diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td index 9f55fb3..d246a26 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -44,7 +44,9 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, IIPseudo>; + MipsInst<outs, ins, asmstr, pattern, IIPseudo> { + let isPseudo = 1; +} //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> @@ -88,6 +90,21 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let Inst{15-0} = imm16; } +class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin>: + MipsInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rs; + bits<5> rt; + bits<16> imm16; + + let opcode = op; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + //===----------------------------------------------------------------------===// // Format J instruction class in Mips : <|opcode|address|> //===----------------------------------------------------------------------===// @@ -224,4 +241,27 @@ class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, let Inst{15-11} = fs; let Inst{10-6} = fd; let Inst{5-0} = 17; -}
\ No newline at end of file +} + +// FP unary instructions without patterns. +class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, + RegisterClass DstRC, RegisterClass SrcRC> : + FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> { + let ft = 0; +} + +// FP unary instructions with patterns. +class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, + RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> : + FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), + [(set DstRC:$fd, (OpNode SrcRC:$fs))]> { + let ft = 0; +} + +class FFR2P<bits<6> funct, bits<5> fmt, string opstr, + string fmtstr, RegisterClass RC, SDNode OpNode> : + FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"), + [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>; diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 0a7a7f2..559943a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -17,8 +17,8 @@ #include "InstPrinter/MipsInstPrinter.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR @@ -28,7 +28,8 @@ using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), + RI(*TM.getSubtargetImpl(), *this) {} const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { @@ -47,8 +48,12 @@ static bool isZeroImm(const MachineOperand &op) { unsigned MipsInstrInfo:: isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { - if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || - (MI->getOpcode() == Mips::LDC1)) { + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || + (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || + (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || + (Opc == Mips::LDC164_P8)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -68,8 +73,12 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const unsigned MipsInstrInfo:: isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { - if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || - (MI->getOpcode() == Mips::SDC1)) { + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || + (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || + (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || + (Opc == Mips::SDC164_P8)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -94,70 +103,63 @@ copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - bool DestCPU = Mips::CPURegsRegClass.contains(DestReg); - bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg); - - // CPU-CPU is the most common. - if (DestCPU && SrcCPU) { - BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } + unsigned Opc = 0, ZeroReg = 0; - // Copy to CPU from other registers. - if (DestCPU) { - if (Mips::CCRRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + else if (Mips::CCRRegClass.contains(SrcReg)) + Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MFC1; else if (SrcReg == Mips::HI) - BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg); + Opc = Mips::MFHI, SrcReg = 0; else if (SrcReg == Mips::LO) - BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg); - else - llvm_unreachable("Copy to CPU from invalid register"); - return; + Opc = Mips::MFLO, SrcReg = 0; } - - // Copy to other registers from CPU. - if (SrcCPU) { + else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) - BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) - BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MTC1; else if (DestReg == Mips::HI) - BuildMI(MBB, I, DL, get(Mips::MTHI)) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MTHI, DestReg = 0; else if (DestReg == Mips::LO) - BuildMI(MBB, I, DL, get(Mips::MTLO)) - .addReg(SrcReg, getKillRegState(KillSrc)); - else - llvm_unreachable("Copy from CPU to invalid register"); - return; + Opc = Mips::MTLO, DestReg = 0; } - - if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; + else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_S; + else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D32; + else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) + Opc = Mips::MOVCCRToCCR; + else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (Mips::CPU64RegsRegClass.contains(SrcReg)) + Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + else if (SrcReg == Mips::HI64) + Opc = Mips::MFHI64, SrcReg = 0; + else if (SrcReg == Mips::LO64) + Opc = Mips::MFLO64, SrcReg = 0; } - - if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; + else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (DestReg == Mips::HI64) + Opc = Mips::MTHI64, DestReg = 0; + else if (DestReg == Mips::LO64) + Opc = Mips::MTLO64, DestReg = 0; } - if (Mips::CCRRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - llvm_unreachable("Cannot copy registers"); + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); } void MipsInstrInfo:: @@ -167,31 +169,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + else if (RC == Mips::CPU64RegsRegisterClass) + Opc = IsN64 ? Mips::SD_P8 : Mips::SD; else if (RC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - else if (RC == Mips::AFGR64RegisterClass) { - if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { - BuildMI(MBB, I, DL, get(Mips::SDC1)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - } else { - const TargetRegisterInfo *TRI = - MBB.getParent()->getTarget().getRegisterInfo(); - const unsigned *SubSet = TRI->getSubRegisters(SrcReg); - BuildMI(MBB, I, DL, get(Mips::SWC1)) - .addReg(SubSet[0], getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - BuildMI(MBB, I, DL, get(Mips::SWC1)) - .addReg(SubSet[1], getKillRegState(isKill)) - .addFrameIndex(FI).addImm(4); - } - } else - llvm_unreachable("Register class not handled!"); + Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + else if (RC == Mips::AFGR64RegisterClass) + Opc = Mips::SDC1; + else if (RC == Mips::FGR64RegisterClass) + Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); } void MipsInstrInfo:: @@ -202,25 +195,21 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0); + Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + else if (RC == Mips::CPU64RegsRegisterClass) + Opc = IsN64 ? Mips::LD_P8 : Mips::LD; else if (RC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0); - else if (RC == Mips::AFGR64RegisterClass) { - if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { - BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0); - } else { - const TargetRegisterInfo *TRI = - MBB.getParent()->getTarget().getRegisterInfo(); - const unsigned *SubSet = TRI->getSubRegisters(DestReg); - BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) - .addFrameIndex(FI).addImm(0); - BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1]) - .addFrameIndex(FI).addImm(4); - } - } else - llvm_unreachable("Register class not handled!"); + Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + else if (RC == Mips::AFGR64RegisterClass) + Opc = Mips::LDC1; + else if (RC == Mips::FGR64RegisterClass) + Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0); } MachineInstr* @@ -237,9 +226,12 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, //===----------------------------------------------------------------------===// static unsigned GetAnalyzableBrOpc(unsigned Opc) { - return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || - Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0; + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || + Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? + Opc : 0; } /// GetOppositeBranchOpc - Return the inverse of the specified @@ -248,14 +240,20 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ : return Mips::BNE; - case Mips::BNE : return Mips::BEQ; - case Mips::BGTZ : return Mips::BLEZ; - case Mips::BGEZ : return Mips::BLTZ; - case Mips::BLTZ : return Mips::BGEZ; - case Mips::BLEZ : return Mips::BGTZ; - case Mips::BC1T : return Mips::BC1F; - case Mips::BC1F : return Mips::BC1T; + case Mips::BEQ : return Mips::BNE; + case Mips::BNE : return Mips::BEQ; + case Mips::BGTZ : return Mips::BLEZ; + case Mips::BGEZ : return Mips::BLTZ; + case Mips::BLTZ : return Mips::BGEZ; + case Mips::BLEZ : return Mips::BGTZ; + case Mips::BEQ64 : return Mips::BNE64; + case Mips::BNE64 : return Mips::BEQ64; + case Mips::BGTZ64 : return Mips::BLEZ64; + case Mips::BGEZ64 : return Mips::BLTZ64; + case Mips::BLTZ64 : return Mips::BGEZ64; + case Mips::BLEZ64 : return Mips::BGTZ64; + case Mips::BC1T : return Mips::BC1F; + case Mips::BC1F : return Mips::BC1T; } } diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h index 4421c48..271d248 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -72,12 +72,47 @@ namespace MipsII { /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from // the thread pointer (Local Exec TLS). MO_TPREL_HI, - MO_TPREL_LO + MO_TPREL_LO, + + // N32/64 Flags. + MO_GPOFF_HI, + MO_GPOFF_LO, + MO_GOT_DISP, + MO_GOT_PAGE, + MO_GOT_OFST + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // Mips instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15 }; } class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; + bool IsN64; const MipsRegisterInfo RI; public: explicit MipsInstrInfo(MipsTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index d1a0587..06b7de7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -34,13 +34,20 @@ def SDT_MipsMAddMSub : SDTypeProfile<0, 4, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>]>; def SDT_MipsDivRem : SDTypeProfile<0, 2, - [SDTCisVT<0, i32>, + [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, iPTR>]>; +def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; +def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, @@ -106,6 +113,11 @@ def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>; def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, [SDNPHasChain, SDNPInGlue]>; +def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; + +def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; +def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -113,8 +125,13 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; def HasBitCount : Predicate<"Subtarget.hasBitCount()">; def HasSwap : Predicate<"Subtarget.hasSwap()">; def HasCondMov : Predicate<"Subtarget.hasCondMov()">; -def IsMips32 : Predicate<"Subtarget.isMips32()">; -def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">; +def HasMips32 : Predicate<"Subtarget.hasMips32()">; +def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; +def HasMips64 : Predicate<"Subtarget.hasMips64()">; +def NotMips64 : Predicate<"!Subtarget.hasMips64()">; +def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; +def IsN64 : Predicate<"Subtarget.isABI_N64()">; +def NotN64 : Predicate<"!Subtarget.isABI_N64()">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -124,6 +141,7 @@ def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">; def brtarget : Operand<OtherVT>; def calltarget : Operand<i32>; def simm16 : Operand<i32>; +def simm16_64 : Operand<i64>; def shamt : Operand<i32>; // Unsigned Operand @@ -137,6 +155,11 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); } +def mem64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPU64Regs, simm16_64); +} + def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); @@ -177,36 +200,85 @@ def immZExt5 : PatLeaf<(imm), [{ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; //===----------------------------------------------------------------------===// +// Pattern fragment for load/store +//===----------------------------------------------------------------------===// +class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ + LoadSDNode *LD = cast<LoadSDNode>(N); + return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment(); +}]>; + +class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ + LoadSDNode *LD = cast<LoadSDNode>(N); + return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment(); +}]>; + +class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), + (Node node:$val, node:$ptr), [{ + StoreSDNode *SD = cast<StoreSDNode>(N); + return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment(); +}]>; + +class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), + (Node node:$val, node:$ptr), [{ + StoreSDNode *SD = cast<StoreSDNode>(N); + return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment(); +}]>; + +// Load/Store PatFrags. +def sextloadi16_a : AlignedLoad<sextloadi16>; +def zextloadi16_a : AlignedLoad<zextloadi16>; +def extloadi16_a : AlignedLoad<extloadi16>; +def load_a : AlignedLoad<load>; +def sextloadi32_a : AlignedLoad<sextloadi32>; +def zextloadi32_a : AlignedLoad<zextloadi32>; +def extloadi32_a : AlignedLoad<extloadi32>; +def truncstorei16_a : AlignedStore<truncstorei16>; +def store_a : AlignedStore<store>; +def truncstorei32_a : AlignedStore<truncstorei32>; +def sextloadi16_u : UnalignedLoad<sextloadi16>; +def zextloadi16_u : UnalignedLoad<zextloadi16>; +def extloadi16_u : UnalignedLoad<extloadi16>; +def load_u : UnalignedLoad<load>; +def sextloadi32_u : UnalignedLoad<sextloadi32>; +def zextloadi32_u : UnalignedLoad<zextloadi32>; +def extloadi32_u : UnalignedLoad<extloadi32>; +def truncstorei16_u : UnalignedStore<truncstorei16>; +def store_u : UnalignedStore<store>; +def truncstorei32_u : UnalignedStore<truncstorei32>; + +//===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// -// Arithmetic 3 register operands -class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, - InstrItinClass itin, bit isComm = 0>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> { +// Arithmetic and logical instructions with 3 register operands. +class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, + InstrItinClass itin, RegisterClass RC, bit isComm = 0>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> { + let shamt = 0; let isCommutable = isComm; } class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm, - bit isComm = 0>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> { + InstrItinClass itin, RegisterClass RC, bit isComm = 0>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> { + let shamt = 0; let isCommutable = isComm; } -// Arithmetic 2 register operands -class ArithI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type> : - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; +// Arithmetic and logical instructions with 2 register operands. +class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rt, $rs, $i"), + [(set RC:$rt, (OpNode RC:$rs, imm_type:$i))], IIAlu>; class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type> : - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>; + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rt, $rs, $i"), [], IIAlu>; // Arithmetic Multiply ADD/SUB let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in @@ -214,92 +286,134 @@ class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> : FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), !strconcat(instr_asm, "\t$rs, $rt"), [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> { + let rd = 0; + let shamt = 0; let isCommutable = isComm; } // Logical -let isCommutable = 1 in -class LogicR<bits<6> func, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; - -class LogicI<bits<6> op, string instr_asm, SDNode OpNode>: - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>; - -let isCommutable = 1 in -class LogicNOR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; +class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> { + let shamt = 0; + let isCommutable = 1; +} // Shifts class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> { + FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt), + !strconcat(instr_asm, "\t$rd, $rt, $shamt"), + [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> { let rs = _rs; } -class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, +class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> { - let shamt = _shamt; + FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt, $rs"), + [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> { + let shamt = isRotate; } // Load Upper Imediate class LoadUpper<bits<6> op, string instr_asm>: - FI< op, - (outs CPURegs:$dst), - (ins uimm16:$imm), - !strconcat(instr_asm, "\t$dst, $imm"), - [], IIAlu>; + FI<op, (outs CPURegs:$rt), (ins uimm16:$imm), + !strconcat(instr_asm, "\t$rt, $imm"), [], IIAlu> { + let rs = 0; +} // Memory Load/Store -let canFoldAsLoad = 1, hasDelaySlot = 1 in -class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI<op, (outs CPURegs:$dst), (ins mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>; +let canFoldAsLoad = 1 in +class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, + Operand MemOpnd, bit Pseudo>: + FI<op, (outs RC:$rt), (ins MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr"), + [(set RC:$rt, (OpNode addr:$addr))], IILoad> { + let isPseudo = Pseudo; +} -class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI<op, (outs), (ins CPURegs:$dst, mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(OpNode CPURegs:$dst, addr:$addr)], IIStore>; +class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, + Operand MemOpnd, bit Pseudo>: + FI<op, (outs), (ins RC:$rt, MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr"), + [(OpNode RC:$rt, addr:$addr)], IIStore> { + let isPseudo = Pseudo; +} + +// 32-bit load. +multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 64-bit load. +multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 32-bit store. +multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 64-bit store. +multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, + Requires<[IsN64]>; +} // Conditional Branch -let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in { -class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>: - FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), - !strconcat(instr_asm, "\t$a, $b, $offset"), - [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], - IIBranch>; +class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: + CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$offset), + !strconcat(instr_asm, "\t$rs, $rt, $offset"), + [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; +} -class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>: - FI<op, (outs), (ins CPURegs:$src, brtarget:$offset), - !strconcat(instr_asm, "\t$src, $offset"), - [(brcond (cond_op CPURegs:$src, 0), bb:$offset)], - IIBranch>; +class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, + RegisterClass RC>: + CBranchBase<op, (outs), (ins RC:$rs, brtarget:$offset), + !strconcat(instr_asm, "\t$rs, $offset"), + [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch> { + let rt = _rt; + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; } // SetCC -class SetCC_R<bits<6> op, bits<6> func, string instr_asm, - PatFrag cond_op>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))], - IIAlu>; +class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op, + RegisterClass RC>: + FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], + IIAlu> { + let shamt = 0; +} -class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, - Operand Od, PatLeaf imm_type>: - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))], +class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, + PatLeaf imm_type, RegisterClass RC>: + FI<op, (outs CPURegs:$rd), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rd, $rs, $i"), + [(set CPURegs:$rd, (cond_op RC:$rs, imm_type:$i))], IIAlu>; // Unconditional branch @@ -310,8 +424,12 @@ class JumpFJ<bits<6> op, string instr_asm>: let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in class JumpFR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPURegs:$target), - !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>; + FR<op, func, (outs), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> { + let rt = 0; + let rd = 0; + let shamt = 0; +} // Jump and Link (Call) let isCall=1, hasDelaySlot=1, @@ -323,76 +441,124 @@ let isCall=1, hasDelaySlot=1, !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], IIBranch>; - let rd=31 in class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>: FR<op, func, (outs), (ins CPURegs:$rs, variable_ops), - !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>; + !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> { + let rt = 0; + let rd = 31; + let shamt = 0; + } class BranchLink<string instr_asm>: FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops), - !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>; + !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch> { + let rt = 0; + } } // Mul, Div -let Defs = [HI, LO] in { - let isCommutable = 1 in - class Mul<bits<6> func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$a, $b"), [], itin>; +class Mul<bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$rs, $rt"), [], itin> { + let rd = 0; + let shamt = 0; + let isCommutable = 1; + let Defs = [HI, LO]; +} - class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$$zero, $a, $b"), - [(op CPURegs:$a, CPURegs:$b)], itin>; +class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$$zero, $rs, $rt"), + [(op CPURegs:$rs, CPURegs:$rt)], itin> { + let rd = 0; + let shamt = 0; + let Defs = [HI, LO]; } // Move from Hi/Lo class MoveFromLOHI<bits<6> func, string instr_asm>: - FR<0x00, func, (outs CPURegs:$dst), (ins), - !strconcat(instr_asm, "\t$dst"), [], IIHiLo>; + FR<0x00, func, (outs CPURegs:$rd), (ins), + !strconcat(instr_asm, "\t$rd"), [], IIHiLo> { + let rs = 0; + let rt = 0; + let shamt = 0; +} class MoveToLOHI<bits<6> func, string instr_asm>: - FR<0x00, func, (outs), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$src"), [], IIHiLo>; + FR<0x00, func, (outs), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rs"), [], IIHiLo> { + let rt = 0; + let rd = 0; + let shamt = 0; +} class EffectiveAddress<string instr_asm> : - FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr), - instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>; + FI<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr), + instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>: - FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>, + FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>, Requires<[HasBitCount]> { let shamt = 0; let rt = rd; } // Sign Extend in Register. -class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>: - FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>; +class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>: + FR<0x3f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt"), + [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> { + let rs = 0; + let shamt = sa; + let Predicates = [HasSEInReg]; +} // Byte Swap -class ByteSwap<bits<6> func, string instr_asm>: - FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>; - -// Conditional Move -class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, - CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), - [], NoItinerary>; +class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>: + FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt"), + [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> { + let rs = 0; + let shamt = sa; + let Predicates = [HasSwap]; +} // Read Hardware -class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src), - "rdhwr\t$dst, $src", [], IIAlu> { +class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd), + "rdhwr\t$rt, $rd", [], IIAlu> { let rs = 0; let shamt = 0; } +// Ext and Ins +class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins, + list<dag> pattern, InstrItinClass itin>: + FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + pattern, itin>, Requires<[HasMips32r2]> { + bits<5> pos; + bits<5> sz; + let rd = sz; + let shamt = pos; +} + +// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). +class Atomic2Ops<PatFrag Op, string Opstr> : + MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), + [(set CPURegs:$dst, + (Op CPURegs:$ptr, CPURegs:$incr))]>; + +// Atomic Compare & Swap. +class AtomicCmpSwap<PatFrag Op, string Width> : + MipsPseudo<(outs CPURegs:$dst), + (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap), + !strconcat("atomic_cmp_swap_", Width, + "\t$dst, $ptr, $cmp, $swap"), + [(set CPURegs:$dst, + (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>; + //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -427,112 +593,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { - def ATOMIC_LOAD_ADD_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_ADD_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_ADD_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_SUB_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_SUB_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_SUB_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_AND_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_AND_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_AND_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_OR_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_OR_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_OR_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_XOR_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_XOR_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_XOR_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_NAND_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_NAND_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_NAND_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_SWAP_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_8\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>; - def ATOMIC_SWAP_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_16\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>; - def ATOMIC_SWAP_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_32\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>; - - def ATOMIC_CMP_SWAP_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; - def ATOMIC_CMP_SWAP_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; - def ATOMIC_CMP_SWAP_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">; + def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">; + def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">; + def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">; + def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">; + def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">; + def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">; + def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">; + def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">; + def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">; + def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">; + def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">; + def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">; + def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">; + def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">; + def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">; + def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">; + + def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">; + def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">; + def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">; + + def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">; + def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">; + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">; } //===----------------------------------------------------------------------===// @@ -544,26 +630,26 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>; -def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>; -def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>; -def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>; -def ANDi : LogicI<0x0c, "andi", and>; -def ORi : LogicI<0x0d, "ori", or>; -def XORi : LogicI<0x0e, "xori", xor>; +def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>; +def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>; +def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>; +def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; +def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>; +def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>; +def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>; def LUi : LoadUpper<0x0f, "lui">; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>; -def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>; -def ADD : ArithOverflowR<0x00, 0x20, "add", 1>; -def SUB : ArithOverflowR<0x00, 0x22, "sub">; -def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>; -def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>; -def AND : LogicR<0x24, "and", and>; -def OR : LogicR<0x25, "or", or>; -def XOR : LogicR<0x26, "xor", xor>; -def NOR : LogicNOR<0x00, 0x27, "nor">; +def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>; +def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>; +def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>; +def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>; +def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>; +def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>; +def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>; +def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>; +def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>; +def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>; /// Shift Instructions def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>; @@ -574,45 +660,58 @@ def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>; def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>; // Rotate Instructions -let Predicates = [IsMips32r2] in { +let Predicates = [HasMips32r2] in { def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>; def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>; } /// Load and Store Instructions -def LB : LoadM<0x20, "lb", sextloadi8>; -def LBu : LoadM<0x24, "lbu", zextloadi8>; -def LH : LoadM<0x21, "lh", sextloadi16>; -def LHu : LoadM<0x25, "lhu", zextloadi16>; -def LW : LoadM<0x23, "lw", load>; -def SB : StoreM<0x28, "sb", truncstorei8>; -def SH : StoreM<0x29, "sh", truncstorei16>; -def SW : StoreM<0x2b, "sw", store>; +/// aligned +defm LB : LoadM32<0x20, "lb", sextloadi8>; +defm LBu : LoadM32<0x24, "lbu", zextloadi8>; +defm LH : LoadM32<0x21, "lh", sextloadi16_a>; +defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>; +defm LW : LoadM32<0x23, "lw", load_a>; +defm SB : StoreM32<0x28, "sb", truncstorei8>; +defm SH : StoreM32<0x29, "sh", truncstorei16_a>; +defm SW : StoreM32<0x2b, "sw", store_a>; + +/// unaligned +defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>; +defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>; +defm ULW : LoadM32<0x23, "ulw", load_u, 1>; +defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; +defm USW : StoreM32<0x2b, "usw", store_u, 1>; + +let hasSideEffects = 1 in +def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", + [(MipsSync imm:$stype)], NoItinerary> +{ + let opcode = 0; + let Inst{25-11} = 0; + let Inst{5-0} = 15; +} /// Load-linked, Store-conditional -let hasDelaySlot = 1 in +let mayLoad = 1 in def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr), "ll\t$dst, $addr", [], IILoad>; -let Constraints = "$src = $dst" in +let mayStore = 1, Constraints = "$src = $dst" in def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr), "sc\t$src, $addr", [], IIStore>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -def JR : JumpFR<0x00, 0x08, "jr">; +let isIndirectBranch = 1 in + def JR : JumpFR<0x00, 0x08, "jr">; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; -def BEQ : CBranch<0x04, "beq", seteq>; -def BNE : CBranch<0x05, "bne", setne>; - -let rt=1 in - def BGEZ : CBranchZero<0x01, "bgez", setge>; - -let rt=0 in { - def BGTZ : CBranchZero<0x07, "bgtz", setgt>; - def BLEZ : CBranchZero<0x07, "blez", setle>; - def BLTZ : CBranchZero<0x01, "bltz", setlt>; -} +def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; +def BNE : CBranch<0x05, "bne", setne, CPURegs>; +def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>; +def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>; +def BLEZ : CBranchZero<0x07, 0, "blez", setle, CPURegs>; +def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>; def BGEZAL : BranchLink<"bgezal">; def BLTZAL : BranchLink<"bltzal">; @@ -639,40 +738,31 @@ let Uses = [LO] in def MFLO : MoveFromLOHI<0x12, "mflo">; /// Sign Ext In Register Instructions. -let Predicates = [HasSEInReg] in { - let shamt = 0x10, rs = 0 in - def SEB : SignExtInReg<0x21, "seb", i8>; - - let shamt = 0x18, rs = 0 in - def SEH : SignExtInReg<0x20, "seh", i16>; -} +def SEB : SignExtInReg<0x10, "seb", i8>; +def SEH : SignExtInReg<0x18, "seh", i16>; /// Count Leading -def CLZ : CountLeading<0b100000, "clz", - [(set CPURegs:$dst, (ctlz CPURegs:$src))]>; -def CLO : CountLeading<0b100001, "clo", - [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>; +def CLZ : CountLeading<0x20, "clz", + [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>; +def CLO : CountLeading<0x21, "clo", + [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>; /// Byte Swap -let Predicates = [HasSwap] in { - let shamt = 0x3, rs = 0 in - def WSBW : ByteSwap<0x20, "wsbw">; -} - -/// Conditional Move -def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; -def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; +def WSBW : ByteSwap<0x20, 0x2, "wsbw">; // Conditional moves: // These instructions are expanded in // MipsISelLowering::EmitInstrWithCustomInserter if target does not have // conditional move instructions. // flag:int, data:int -let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in - class CondMovIntInt<bits<6> funct, string instr_asm> : - FR<0, funct, (outs CPURegs:$dst), - (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F), - !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>; +class CondMovIntInt<bits<6> funct, string instr_asm> : + FR<0, funct, (outs CPURegs:$rd), + (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> { + let shamt = 0; + let usesCustomInserter = 1; + let Constraints = "$F = $rd"; +} def MOVZ_I : CondMovIntInt<0x0a, "movz">; def MOVN_I : CondMovIntInt<0x0b, "movn">; @@ -685,13 +775,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; @@ -701,10 +791,25 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>; // MUL is a assembly macro in the current used ISAs. In recent ISA's // it is a real instruction. -def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>; +def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, + Requires<[HasMips32]>; def RDHWR : ReadHardware; +def EXT : ExtIns<0, "ext", (outs CPURegs:$rt), + (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz), + [(set CPURegs:$rt, + (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))], + NoItinerary>; + +let Constraints = "$src = $rt" in +def INS : ExtIns<4, "ins", (outs CPURegs:$rt), + (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz, CPURegs:$src), + [(set CPURegs:$rt, + (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz, + CPURegs:$src))], + NoItinerary>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -738,16 +843,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; +def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), (ADDiu CPURegs:$hi, tblockaddress:$lo)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; @@ -763,6 +872,7 @@ def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)), // tprel hi/lo def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; +def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)), (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; @@ -784,60 +894,67 @@ def : Pat<(not CPURegs:$in), // extended load and stores def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>; def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>; -def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>; +def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>; +def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>; // peepholes def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; // brcond patterns -def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst), - (BNE CPURegs:$lhs, ZERO, bb:$dst)>; -def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst), - (BEQ CPURegs:$lhs, ZERO, bb:$dst)>; - -def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), - (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), - (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; - -def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; - -def : Pat<(brcond CPURegs:$cond, bb:$dst), - (BNE CPURegs:$cond, ZERO, bb:$dst)>; +multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp, + Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, + Instruction SLTiuOp, Register ZEROReg> { +def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; + +def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; + +def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; + +def : Pat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +} + +defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; // select patterns multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> { - def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>; - def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F), + def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>; - def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; - def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; - def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F), + def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F), (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>; } multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> { - def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; def : Pat<(select CPURegs:$cond, RC:$T, RC:$F), (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>; - def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F), + def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F), (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>; } @@ -845,30 +962,48 @@ defm : MovzPats<CPURegs, MOVZ_I>; defm : MovnPats<CPURegs, MOVN_I>; // setcc patterns -def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), - (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>; -def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs), - (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>; - -def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>; -def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>; - -def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs), - (SLT CPURegs:$rhs, CPURegs:$lhs)>; -def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs), - (SLTu CPURegs:$rhs, CPURegs:$lhs)>; - -def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>; -def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>; - -def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs), - (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>; -def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs), - (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>; +multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp, + Instruction SLTuOp, Register ZEROReg> { + def : Pat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : Pat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; +} + +multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setle RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : Pat<(setule RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; +} + +multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : Pat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; +} + +multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setge RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : Pat<(setuge RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; +} + +multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp, + Instruction SLTiuOp> { + def : Pat<(setge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; + def : Pat<(setuge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; +} + +defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>; +defm : SetlePats<CPURegs, SLT, SLTu>; +defm : SetgtPats<CPURegs, SLT, SLTu>; +defm : SetgePats<CPURegs, SLT, SLTu>; +defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // select MipsDynAlloc def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; @@ -878,4 +1013,5 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; //===----------------------------------------------------------------------===// include "MipsInstrFPU.td" +include "Mips64InstrInfo.td" diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp new file mode 100644 index 0000000..28c2b48 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp @@ -0,0 +1,230 @@ +//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Mips target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "MipsJITInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Memory.h" +#include <cstdlib> +using namespace llvm; + + +void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. This code saves registers, calls +// MipsCompilationCallbackC and restores registers. +extern "C" { +#if defined (__mips__) +void MipsCompilationCallback(); + + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "MipsCompilationCallback\n" + ASMPREFIX "MipsCompilationCallback:\n" + ".ent " ASMPREFIX "MipsCompilationCallback\n" + ".frame $29, 32, $31\n" + ".set noreorder\n" + ".cpload $t9\n" + + "addiu $sp, $sp, -60\n" + ".cprestore 16\n" + + // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain + // stuff for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned. We also need to save the ra register since it contains the + // original return address, and t8 register since it contains the address + // of the end of function stub. + "sw $a0, 20($sp)\n" + "sw $a1, 24($sp)\n" + "sw $a2, 28($sp)\n" + "sw $a3, 32($sp)\n" + "sw $ra, 36($sp)\n" + "sw $t8, 40($sp)\n" + "sdc1 $f12, 44($sp)\n" + "sdc1 $f14, 52($sp)\n" + + // t8 points at the end of function stub. Pass the beginning of the stub + // to the MipsCompilationCallbackC. + "addiu $a0, $t8, -16\n" + "jal " ASMPREFIX "MipsCompilationCallbackC\n" + "nop\n" + + // Restore registers. + "lw $a0, 20($sp)\n" + "lw $a1, 24($sp)\n" + "lw $a2, 28($sp)\n" + "lw $a3, 32($sp)\n" + "lw $ra, 36($sp)\n" + "lw $t8, 40($sp)\n" + "ldc1 $f12, 44($sp)\n" + "ldc1 $f14, 52($sp)\n" + "addiu $sp, $sp, 60\n" + + // Jump to the (newly modified) stub to invoke the real function. + "addiu $t8, $t8, -16\n" + "jr $t8\n" + "nop\n" + + ".set reorder\n" + ".end " ASMPREFIX "MipsCompilationCallback\n" + ); +#else // host != Mips + void MipsCompilationCallback() { + llvm_unreachable( + "Cannot call MipsCompilationCallback() on a non-Mips arch!"); + } +#endif +} + +/// MipsCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the first four instructions of the + // stub with code that jumps to the compiled function: + // lui $t9, %hi(NewVal) + // addiu $t9, $t9, %lo(NewVal) + // jr $t9 + // nop + + int Hi = ((unsigned)NewVal & 0xffff0000) >> 16; + if ((NewVal & 0x8000) != 0) + Hi++; + int Lo = (int)(NewVal & 0xffff); + + *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi; + *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo; + *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8; + *(intptr_t *)(StubAddr + 12) = 0; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16); +} + +TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return MipsCompilationCallback; +} + +TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { + // The stub contains 4 4-byte instructions, aligned at 4 bytes. See + // emitFunctionStub for details. + StubLayout Result = { 4*4, 4 }; + return Result; +} + +void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + JCE.emitAlignment(4); + void *Addr = (void*) (JCE.getCurrentPCValue()); + if (!sys::Memory::setRangeWritable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + intptr_t EmittedAddr; + if (Fn != (void*)(intptr_t)MipsCompilationCallback) + EmittedAddr = (intptr_t)Fn; + else + EmittedAddr = (intptr_t)MipsCompilationCallback; + + + int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16; + if ((EmittedAddr & 0x8000) != 0) + Hi++; + int Lo = (int)(EmittedAddr & 0xffff); + + // lui t9, %hi(EmittedAddr) + // addiu t9, t9, %lo(EmittedAddr) + // jalr t8, t9 + // nop + JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordLE(25 << 21 | 24 << 11 | 9); + JCE.emitWordLE(0); + + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((Mips::RelocationType) MR->getRelocationType()) { + case Mips::reloc_mips_branch: + ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_26: + ResultPtr = (ResultPtr & 0x0fffffff) >> 2; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_hi: + ResultPtr = ResultPtr >> 16; + if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) { + ResultPtr += 1; + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_lo: + ResultPtr = ResultPtr & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + default: + llvm_unreachable("ERROR: Unknown Mips relocation."); + } + } +} diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.h b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h new file mode 100644 index 0000000..41f32a3 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h @@ -0,0 +1,70 @@ +//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSJITINFO_H +#define MIPSJITINFO_H + +#include "MipsMachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { +class MipsTargetMachine; + +class MipsJITInfo : public TargetJITInfo { + + bool IsPIC; + + public: + explicit MipsJITInfo() : + IsPIC(false) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Mips. + virtual StubLayout getStubLayout(); + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC) { + IsPIC = isPIC; + } + +}; +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp index f5cc3aa..608a7d2 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -29,10 +29,10 @@ MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {} MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MachineOperandType MOTy) const { + MachineOperandType MOTy, + unsigned Offset) const { MipsMCSymbolRefExpr::VariantKind Kind; const MCSymbol *Symbol; - int Offset = 0; switch(MO.getTargetFlags()) { default: assert(0 && "Invalid target flag!"); @@ -46,6 +46,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break; case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break; case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break; + case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break; + case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break; + case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break; + case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break; + case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break; } switch (MOTy) { @@ -72,7 +77,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); if (MO.getOffset()) - Offset = MO.getOffset(); + Offset += MO.getOffset(); break; default: @@ -83,36 +88,39 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, Ctx)); } +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: + assert(0 && "unknown operand type"); + break; + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) break; + return MCOperand::CreateReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::CreateImm(MO.getImm()); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, 0); + } + + return MCOperand(); +} + void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - MachineOperandType MOTy = MO.getType(); + MCOperand MCOp = LowerOperand(MO); - switch (MOTy) { - default: - MI->dump(); - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) continue; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, MOTy); - break; - } - - OutMI.addOperand(MCOp); + if (MCOp.isValid()) + OutMI.addOperand(MCOp); } } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h index ec5201b..223f23a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h @@ -1,4 +1,4 @@ -//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===// +//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==// // // The LLVM Compiler Infrastructure // @@ -36,7 +36,8 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, - MachineOperandType MOTy) const; + MachineOperandType MOTy, unsigned Offset) const; + MCOperand LowerOperand(const MachineOperand& MO) const; }; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp index 9a2bdae..a0a242c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp @@ -33,6 +33,11 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { case VK_Mips_GOTTPREL: OS << "%gottprel("; break; case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; + case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; + case VK_Mips_GOT_DISP: OS << "%got_disp("; break; + case VK_Mips_GOT_PAGE: OS << "%got_page("; break; + case VK_Mips_GOT_OFST: OS << "%got_ofst("; break; } OS << *Symbol; @@ -43,7 +48,9 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { OS << Offset; } - if (Kind != VK_Mips_None) + if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO) + OS << ")))"; + else if (Kind != VK_Mips_None) OS << ')'; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h index 3e69596..55e85a7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h +++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h @@ -25,7 +25,12 @@ public: VK_Mips_TLSGD, VK_Mips_GOTTPREL, VK_Mips_TPREL_HI, - VK_Mips_TPREL_LO + VK_Mips_TPREL_LO, + VK_Mips_GPOFF_HI, + VK_Mips_GPOFF_LO, + VK_Mips_GOT_DISP, + VK_Mips_GOT_PAGE, + VK_Mips_GOT_OFST }; private: diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h index dbb7a67..bc30b6b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -51,16 +51,12 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; - /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap - /// intrinsics, it is necessary to use a temporary stack location. - /// This field holds the frame index of this location. - int AtomicFrameIndex; public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), AtomicFrameIndex(-1) + MaxCallFrameSize(0) {} bool isInArgFI(int FI) const { @@ -104,9 +100,6 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } - - int getAtomicFrameIndex() const { return AtomicFrameIndex; } - void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; } }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 24390da..f8c0fda 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -44,7 +43,7 @@ using namespace llvm; MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) - : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {} + : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). @@ -52,39 +51,87 @@ unsigned MipsRegisterInfo:: getRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { - case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0; - case Mips::AT : case Mips::F1 : return 1; - case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2; - case Mips::V1 : case Mips::F3 : return 3; - case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4; - case Mips::A1 : case Mips::F5 : return 5; - case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6; - case Mips::A3 : case Mips::F7 : return 7; - case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8; - case Mips::T1 : case Mips::F9 : return 9; - case Mips::T2 : case Mips::F10: case Mips::D5: return 10; - case Mips::T3 : case Mips::F11: return 11; - case Mips::T4 : case Mips::F12: case Mips::D6: return 12; - case Mips::T5 : case Mips::F13: return 13; - case Mips::T6 : case Mips::F14: case Mips::D7: return 14; - case Mips::T7 : case Mips::F15: return 15; - case Mips::S0 : case Mips::F16: case Mips::D8: return 16; - case Mips::S1 : case Mips::F17: return 17; - case Mips::S2 : case Mips::F18: case Mips::D9: return 18; - case Mips::S3 : case Mips::F19: return 19; - case Mips::S4 : case Mips::F20: case Mips::D10: return 20; - case Mips::S5 : case Mips::F21: return 21; - case Mips::S6 : case Mips::F22: case Mips::D11: return 22; - case Mips::S7 : case Mips::F23: return 23; - case Mips::T8 : case Mips::F24: case Mips::D12: return 24; - case Mips::T9 : case Mips::F25: return 25; - case Mips::K0 : case Mips::F26: case Mips::D13: return 26; - case Mips::K1 : case Mips::F27: return 27; - case Mips::GP : case Mips::F28: case Mips::D14: return 28; - case Mips::SP : case Mips::F29: return 29; - case Mips::FP : case Mips::F30: case Mips::D15: return 30; - case Mips::RA : case Mips::F31: return 31; - default: llvm_unreachable("Unknown register number!"); + case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: + case Mips::D0: + return 0; + case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + return 1; + case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: + case Mips::D1: + return 2; + case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + return 3; + case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: + case Mips::D2: + return 4; + case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64: + return 5; + case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64: + case Mips::D3: + return 6; + case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64: + return 7; + case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64: + case Mips::D4: + return 8; + case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64: + return 9; + case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64: + case Mips::D5: + return 10; + case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64: + return 11; + case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64: + case Mips::D6: + return 12; + case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64: + return 13; + case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64: + case Mips::D7: + return 14; + case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64: + return 15; + case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64: + case Mips::D8: + return 16; + case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64: + return 17; + case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64: + case Mips::D9: + return 18; + case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64: + return 19; + case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64: + case Mips::D10: + return 20; + case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64: + return 21; + case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64: + case Mips::D11: + return 22; + case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64: + return 23; + case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64: + case Mips::D12: + return 24; + case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64: + return 25; + case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64: + case Mips::D13: + return 26; + case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64: + return 27; + case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64: + case Mips::D14: + return 28; + case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + return 29; + case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: + case Mips::D15: + return 30; + case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: + return 31; + default: llvm_unreachable("Unknown register number!"); } return 0; // Not reached } @@ -101,7 +148,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const { // Mips callee-save register range is $16-$23, $f20-$f30 static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { - Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26, + Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26, Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20, Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4, Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 @@ -113,31 +160,71 @@ getCalleeSavedRegs(const MachineFunction *MF) const Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 }; + static const unsigned N32CalleeSavedRegs[] = { + Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64, + Mips::D21_64, + Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, + Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, + Mips::S0_64, 0 + }; + + static const unsigned N64CalleeSavedRegs[] = { + Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64, + Mips::D26_64, Mips::D25_64, Mips::D24_64, + Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, + Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, + Mips::S0_64, 0 + }; + if (Subtarget.isSingleFloat()) return SingleFloatOnlyCalleeSavedRegs; - else + else if (!Subtarget.hasMips64()) return Mips32CalleeSavedRegs; + else if (Subtarget.isABI_N32()) + return N32CalleeSavedRegs; + + assert(Subtarget.isABI_N64()); + return N64CalleeSavedRegs; } BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { + static const unsigned ReservedCPURegs[] = { + Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, + Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0 + }; + + static const unsigned ReservedCPU64Regs[] = { + Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, + Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0 + }; + BitVector Reserved(getNumRegs()); - Reserved.set(Mips::ZERO); - Reserved.set(Mips::AT); - Reserved.set(Mips::K0); - Reserved.set(Mips::K1); - Reserved.set(Mips::GP); - Reserved.set(Mips::SP); - Reserved.set(Mips::FP); - Reserved.set(Mips::RA); - Reserved.set(Mips::F31); - Reserved.set(Mips::D15); - - // SRV4 requires that odd register can't be used. - if (!Subtarget.isSingleFloat() && !Subtarget.isMips32()) - for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2) - Reserved.set(FReg); + typedef TargetRegisterClass::iterator RegIter; + + for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg) + Reserved.set(*Reg); + if (Subtarget.hasMips64()) { + for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg) + Reserved.set(*Reg); + + // Reserve all registers in AFGR64. + for (RegIter Reg = Mips::AFGR64RegisterClass->begin(); + Reg != Mips::AFGR64RegisterClass->end(); ++Reg) + Reserved.set(*Reg); + } + else { + // Reserve all registers in CPU64Regs & FGR64. + for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin(); + Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg) + Reserved.set(*Reg); + + for (RegIter Reg = Mips::FGR64RegisterClass->begin(); + Reg != Mips::FGR64RegisterClass->end(); ++Reg) + Reserved.set(*Reg); + } + return Reserved; } @@ -245,11 +332,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } unsigned MipsRegisterInfo:: -getRARegister() const { - return Mips::RA; -} - -unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -267,12 +349,3 @@ getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int MipsRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { - return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h index 646369b..67e57dd 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h @@ -57,15 +57,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; /// Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td index f0db518..925ad9e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td @@ -10,6 +10,11 @@ //===----------------------------------------------------------------------===// // Declarations that describe the MIPS register file //===----------------------------------------------------------------------===// +let Namespace = "Mips" in { +def sub_fpeven : SubRegIndex; +def sub_fpodd : SubRegIndex; +def sub_32 : SubRegIndex; +} // We have banks of 32 registers each. class MipsReg<string n> : Register<n> { @@ -28,22 +33,31 @@ class MipsGPRReg<bits<5> num, string n> : MipsReg<n> { let Num = num; } +// Mips 64-bit CPU Registers +class Mips64GPRReg<bits<5> num, string n, list<Register> subregs> + : MipsRegWithSubRegs<n, subregs> { + let Num = num; + let SubRegIndices = [sub_32]; +} + // Mips 32-bit FPU Registers class FPR<bits<5> num, string n> : MipsReg<n> { let Num = num; } // Mips 64-bit (aliased) FPU Registers -let Namespace = "Mips" in { -def sub_fpeven : SubRegIndex; -def sub_fpodd : SubRegIndex; -} class AFPR<bits<5> num, string n, list<Register> subregs> : MipsRegWithSubRegs<n, subregs> { let Num = num; let SubRegIndices = [sub_fpeven, sub_fpodd]; } +class AFPR64<bits<5> num, string n, list<Register> subregs> + : MipsRegWithSubRegs<n, subregs> { + let Num = num; + let SubRegIndices = [sub_32]; +} + // Mips Hardware Registers class HWR<bits<5> num, string n> : MipsReg<n> { let Num = num; @@ -54,6 +68,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> { //===----------------------------------------------------------------------===// let Namespace = "Mips" in { + // FIXME: Fix DwarfRegNum. // General Purpose Registers def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>; @@ -89,6 +104,40 @@ let Namespace = "Mips" in { def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; + // General Purpose 64-bit Registers + def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>; + def AT_64 : Mips64GPRReg< 1, "AT", [AT]>; + def V0_64 : Mips64GPRReg< 2, "2", [V0]>; + def V1_64 : Mips64GPRReg< 3, "3", [V1]>; + def A0_64 : Mips64GPRReg< 4, "4", [A0]>; + def A1_64 : Mips64GPRReg< 5, "5", [A1]>; + def A2_64 : Mips64GPRReg< 6, "6", [A2]>; + def A3_64 : Mips64GPRReg< 7, "7", [A3]>; + def T0_64 : Mips64GPRReg< 8, "8", [T0]>; + def T1_64 : Mips64GPRReg< 9, "9", [T1]>; + def T2_64 : Mips64GPRReg< 10, "10", [T2]>; + def T3_64 : Mips64GPRReg< 11, "11", [T3]>; + def T4_64 : Mips64GPRReg< 12, "12", [T4]>; + def T5_64 : Mips64GPRReg< 13, "13", [T5]>; + def T6_64 : Mips64GPRReg< 14, "14", [T6]>; + def T7_64 : Mips64GPRReg< 15, "15", [T7]>; + def S0_64 : Mips64GPRReg< 16, "16", [S0]>; + def S1_64 : Mips64GPRReg< 17, "17", [S1]>; + def S2_64 : Mips64GPRReg< 18, "18", [S2]>; + def S3_64 : Mips64GPRReg< 19, "19", [S3]>; + def S4_64 : Mips64GPRReg< 20, "20", [S4]>; + def S5_64 : Mips64GPRReg< 21, "21", [S5]>; + def S6_64 : Mips64GPRReg< 22, "22", [S6]>; + def S7_64 : Mips64GPRReg< 23, "23", [S7]>; + def T8_64 : Mips64GPRReg< 24, "24", [T8]>; + def T9_64 : Mips64GPRReg< 25, "25", [T9]>; + def K0_64 : Mips64GPRReg< 26, "26", [K0]>; + def K1_64 : Mips64GPRReg< 27, "27", [K1]>; + def GP_64 : Mips64GPRReg< 28, "GP", [GP]>; + def SP_64 : Mips64GPRReg< 29, "SP", [SP]>; + def FP_64 : Mips64GPRReg< 30, "FP", [FP]>; + def RA_64 : Mips64GPRReg< 31, "RA", [RA]>; + /// Mips Single point precision FPU Registers def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; @@ -142,10 +191,49 @@ let Namespace = "Mips" in { def D14 : AFPR<28, "F28", [F28, F29]>; def D15 : AFPR<30, "F30", [F30, F31]>; + /// Mips Double point precision FPU Registers in MFP64 mode. + def D0_64 : AFPR64<0, "F0", [F0]>; + def D1_64 : AFPR64<1, "F1", [F1]>; + def D2_64 : AFPR64<2, "F2", [F2]>; + def D3_64 : AFPR64<3, "F3", [F3]>; + def D4_64 : AFPR64<4, "F4", [F4]>; + def D5_64 : AFPR64<5, "F5", [F5]>; + def D6_64 : AFPR64<6, "F6", [F6]>; + def D7_64 : AFPR64<7, "F7", [F7]>; + def D8_64 : AFPR64<8, "F8", [F8]>; + def D9_64 : AFPR64<9, "F9", [F9]>; + def D10_64 : AFPR64<10, "F10", [F10]>; + def D11_64 : AFPR64<11, "F11", [F11]>; + def D12_64 : AFPR64<12, "F12", [F12]>; + def D13_64 : AFPR64<13, "F13", [F13]>; + def D14_64 : AFPR64<14, "F14", [F14]>; + def D15_64 : AFPR64<15, "F15", [F15]>; + def D16_64 : AFPR64<16, "F16", [F16]>; + def D17_64 : AFPR64<17, "F17", [F17]>; + def D18_64 : AFPR64<18, "F18", [F18]>; + def D19_64 : AFPR64<19, "F19", [F19]>; + def D20_64 : AFPR64<20, "F20", [F20]>; + def D21_64 : AFPR64<21, "F21", [F21]>; + def D22_64 : AFPR64<22, "F22", [F22]>; + def D23_64 : AFPR64<23, "F23", [F23]>; + def D24_64 : AFPR64<24, "F24", [F24]>; + def D25_64 : AFPR64<25, "F25", [F25]>; + def D26_64 : AFPR64<26, "F26", [F26]>; + def D27_64 : AFPR64<27, "F27", [F27]>; + def D28_64 : AFPR64<28, "F28", [F28]>; + def D29_64 : AFPR64<29, "F29", [F29]>; + def D30_64 : AFPR64<30, "F30", [F30]>; + def D31_64 : AFPR64<31, "F31", [F31]>; + // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; def LO : Register<"lo">, DwarfRegNum<[65]>; + let SubRegIndices = [sub_32] in { + def HI64 : RegisterWithSubRegs<"hi", [HI]>; + def LO64 : RegisterWithSubRegs<"lo", [LO]>; + } + // Status flags register def FCR31 : Register<"31">; @@ -167,6 +255,18 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, (add // Reserved ZERO, AT, K0, K1, GP, SP, FP, RA)>; +def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add + // Return Values and Arguments + V0_64, V1_64, A0_64, A1_64, A2_64, A3_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64, + // Callee save + S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, + // Reserved + ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> { + let SubRegClasses = [(CPURegs sub_32)]; +} + // 64bit fp: // * FGR64 - 32 64-bit registers // * AFGR64 - 16 32-bit even registers (32-bit FP Mode) @@ -182,17 +282,22 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Not preserved across procedure calls D2, D3, D4, D5, D8, D9, // Callee save - D10, D11, D12, D13, D14, - // Reserved - D15)> { + D10, D11, D12, D13, D14, D15)> { let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; } +def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> { + let SubRegClasses = [(FGR32 sub_32)]; +} + // Condition Register for floating point operations def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>; // Hi/Lo Registers def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> { + let SubRegClasses = [(HILO sub_32)]; +} // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; diff --git a/contrib/llvm/lib/Target/Mips/MipsRelocations.h b/contrib/llvm/lib/Target/Mips/MipsRelocations.h new file mode 100644 index 0000000..66d1bfd --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsRelocations.h @@ -0,0 +1,41 @@ +//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file defines the Mips target-specific relocation types +// (for relocation-model=static). +// +//===---------------------------------------------------------------------===// + +#ifndef MIPSRELOCATIONS_H_ +#define MIPSRELOCATIONS_H_ + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace Mips{ + enum RelocationType { + // reloc_mips_branch - pc relative relocation for branches. The lower 18 + // bits of the difference between the branch target and the branch + // instruction, shifted right by 2. + reloc_mips_branch = 1, + + // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the + // lower 16 bits of the address is negative). + reloc_mips_hi = 2, + + // reloc_mips_lo - lower 16 bits of the address. + reloc_mips_lo = 3, + + // reloc_mips_26 - lower 28 bits of the address, shifted right by 2. + reloc_mips_26 = 4 + }; + } +} + +#endif /* MIPSRELOCATIONS_H_ */ diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index 6eee333..016d449 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -13,7 +13,7 @@ #include "MipsSubtarget.h" #include "Mips.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -24,15 +24,14 @@ using namespace llvm; MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little) : MipsGenSubtargetInfo(TT, CPU, FS), - MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), - HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), - HasSwap(false), HasBitCount(false) + MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), + IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), + IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), + HasMinMax(false), HasSwap(false), HasBitCount(false) { std::string CPUName = CPU; if (CPUName.empty()) - CPUName = "mips1"; - MipsArchVersion = Mips1; + CPUName = "mips32r1"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); @@ -40,23 +39,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Set MipsABI if it hasn't been set yet. + if (MipsABI == UnknownABI) + MipsABI = hasMips64() ? N64 : O32; + + // Check if Architecture and ABI are compatible. + assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) || + (hasMips64() && (isABI_N32() || isABI_N64()))) && + "Invalid Arch & ABI pair."); + // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; - - // When only the target triple is specified and is - // a allegrex target, set the features. We also match - // big and little endian allegrex cores (dont really - // know if a big one exists) - if (TT.find("mipsallegrex") != std::string::npos || - TT.find("psp") != std::string::npos) { - MipsABI = EABI; - IsSingleFloat = true; - MipsArchVersion = Mips2; - HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet) - HasSEInReg = true; - HasBitCount = true; - HasSwap = true; - HasCondMov = true; - } } diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 533d4af..d9dddad 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -27,14 +27,15 @@ class StringRef; class MipsSubtarget : public MipsGenSubtargetInfo { public: + // NOTE: O64 will not be supported. enum MipsABIEnum { - O32, O64, N32, N64, EABI + UnknownABI, O32, N32, N64, EABI }; protected: enum MipsArchEnum { - Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2 + Mips32, Mips32r2, Mips64, Mips64r2 }; // Mips architecture version @@ -90,6 +91,8 @@ public: /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } + bool isABI_N64() const { return MipsABI == N64; } + bool isABI_N32() const { return MipsABI == N32; } bool isABI_O32() const { return MipsABI == O32; } unsigned getTargetABI() const { return MipsABI; } @@ -102,9 +105,11 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool isMips1() const { return MipsArchVersion == Mips1; } - bool isMips32() const { return MipsArchVersion >= Mips32; } - bool isMips32r2() const { return MipsArchVersion == Mips32r2; } + bool hasMips32() const { return MipsArchVersion >= Mips32; } + bool hasMips32r2() const { return MipsArchVersion == Mips32r2 || + MipsArchVersion == Mips64r2; } + bool hasMips64() const { return MipsArchVersion >= Mips64; } + bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 20b9f4e..6480da3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -14,13 +14,15 @@ #include "Mips.h" #include "MipsTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeMipsTarget() { // Register the target. - RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget); + RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget); RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget); + RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target); + RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -31,30 +33,47 @@ extern "C" void LLVMInitializeMipsTarget() { // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: -MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool isLittle=false): - LLVMTargetMachine(T, TT, CPU, FS), +MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool isLittle): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, isLittle), - DataLayout(isLittle ? - std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : - std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), + DataLayout(isLittle ? + (Subtarget.isABI_N64() ? + "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : + (Subtarget.isABI_N64() ? + "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this) { - // Abicall enables PIC by default - if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isABI_O32()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::Static); - } + TLInfo(*this), TSInfo(*this), JITInfo() { } +MipsebTargetMachine:: +MipsebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + MipselTargetMachine:: -MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) : - MipsTargetMachine(T, TT, CPU, FS, true) {} +MipselTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + +Mips64ebTargetMachine:: +Mips64ebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + +Mips64elTargetMachine:: +Mips64elTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. @@ -77,7 +96,10 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) bool MipsTargetMachine:: addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - PM.add(createMipsEmitGPRestorePass(*this)); + // Do not restore $gp if target is Mips64. + // In N32/64, $gp is a callee-saved register. + if (!Subtarget.hasMips64()) + PM.add(createMipsEmitGPRestorePass(*this)); return true; } @@ -86,3 +108,12 @@ addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createMipsExpandPseudoPass(*this)); return true; } + +bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Mips. + PM.add(createMipsJITCodeEmitterPass(*this, JCE)); + return false; +} + diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h index a021af2..118ed10 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -22,6 +22,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" +#include "MipsJITInfo.h" namespace llvm { class formatted_raw_ostream; @@ -33,9 +34,12 @@ namespace llvm { MipsFrameLowering FrameLowering; MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; + public: - MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const @@ -46,6 +50,9 @@ namespace llvm { { return &Subtarget; } virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual MipsJITInfo *getJITInfo() + { return &JITInfo; } + virtual const MipsRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -67,16 +74,47 @@ namespace llvm { virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); + virtual bool addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE); + }; -/// MipselTargetMachine - Mipsel target machine. +/// MipsebTargetMachine - Mips32 big endian target machine. +/// +class MipsebTargetMachine : public MipsTargetMachine { +public: + MipsebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +/// MipselTargetMachine - Mips32 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { public: - MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MipselTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; +/// Mips64ebTargetMachine - Mips64 big endian target machine. +/// +class Mips64ebTargetMachine : public MipsTargetMachine { +public: + Mips64ebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +/// Mips64elTargetMachine - Mips64 little endian target machine. +/// +class Mips64elTargetMachine : public MipsTargetMachine { +public: + Mips64elTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp index cf5d1b5..05c46f5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -79,7 +79,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isMergeable1ByteCString()) return false; - const Type *Ty = GV->getType()->getElementType(); + Type *Ty = GV->getType()->getElementType(); return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); } diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index a8d6fe9..243632b 100644 --- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -9,13 +9,23 @@ #include "Mips.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMipsTarget, llvm::TheMipselTarget; +Target llvm::TheMips64Target, llvm::TheMips64elTarget; extern "C" void LLVMInitializeMipsTargetInfo() { - RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips"); + RegisterTarget<Triple::mips, + /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips"); - RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel"); + RegisterTarget<Triple::mipsel, + /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel"); + + RegisterTarget<Triple::mips64, + /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]"); + + RegisterTarget<Triple::mips64el, + /*HasJIT=*/false> B(TheMips64elTarget, + "mips64el", "Mips64el [experimental]"); } diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt deleted file mode 100644 index 331266d..0000000 --- a/contrib/llvm/lib/Target/PTX/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PTX.td) - -tablegen(PTXGenAsmWriter.inc -gen-asm-writer) -tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) -tablegen(PTXGenSubtarget.inc -gen-subtarget) - -add_llvm_target(PTXCodeGen - PTXAsmPrinter.cpp - PTXISelDAGToDAG.cpp - PTXISelLowering.cpp - PTXInstrInfo.cpp - PTXFrameLowering.cpp - PTXMCAsmInfo.cpp - PTXMCAsmStreamer.cpp - PTXMFInfoExtract.cpp - PTXRegisterInfo.cpp - PTXSubtarget.cpp - PTXTargetMachine.cpp - ) - -add_subdirectory(TargetInfo) diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp new file mode 100644 index 0000000..aabb404 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -0,0 +1,192 @@ +//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PTXInstPrinter.h" +#include "MCTargetDesc/PTXBaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "PTXGenAsmWriter.inc" + +PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printPredicate(MI, O); + switch (MI->getOpcode()) { + default: + printInstruction(MI, O); + break; + case PTX::CALL: + printCall(MI, O); + } + O << ";"; + printAnnotation(O, Annot); +} + +void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { + // The last two operands are the predicate operands + int RegIndex; + int OpIndex; + + if (MI->getOpcode() == PTX::CALL) { + RegIndex = 0; + OpIndex = 1; + } else { + RegIndex = MI->getNumOperands()-2; + OpIndex = MI->getNumOperands()-1; + } + + int PredOp = MI->getOperand(OpIndex).getImm(); + if (PredOp == PTXPredicate::None) + return; + + if (PredOp == PTXPredicate::Negate) + O << '!'; + else + O << '@'; + + printOperand(MI, RegIndex, O); +} + +void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { + O << "\tcall.uni\t"; + // The first two operands are the predicate slot + unsigned Index = 2; + unsigned NumRets = MI->getOperand(Index++).getImm(); + + if (NumRets > 0) { + O << "("; + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumRets; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + O << "), "; + } + + O << *(MI->getOperand(Index++).getExpr()) << ", ("; + + unsigned NumArgs = MI->getOperand(Index++).getImm(); + if (NumArgs > 0) { + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumArgs; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + } + O << ")"; +} + +void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + double Imm = Op.getFPImm(); + APFloat FPImm(Imm); + APInt FPIntImm = FPImm.bitcastToAPInt(); + O << "0D"; + // PTX requires us to output the full 64 bits, even if the number is zero + if (FPIntImm.getZExtValue() > 0) { + O << FPIntImm.toString(16, false); + } else { + O << "0000000000000000"; + } + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + const MCExpr *Expr = Op.getExpr(); + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + O << Sym.getName(); + } else { + O << *Op.getExpr(); + } + } +} + +void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // By definition, operand OpNo+1 is an i32imm + const MCOperand &Op2 = MI->getOperand(OpNo+1); + printOperand(MI, OpNo, O); + if (Op2.getImm() == 0) + return; // don't print "+0" + O << "+" << Op2.getImm(); +} + +void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert (Op.isImm() && "Rounding modes must be immediate values"); + switch (Op.getImm()) { + default: + llvm_unreachable("Unknown rounding mode!"); + case PTXRoundingMode::RndDefault: + llvm_unreachable("FP rounding-mode pass did not handle instruction!"); + break; + case PTXRoundingMode::RndNone: + // Do not print anything. + break; + case PTXRoundingMode::RndNearestEven: + O << ".rn"; + break; + case PTXRoundingMode::RndTowardsZero: + O << ".rz"; + break; + case PTXRoundingMode::RndNegInf: + O << ".rm"; + break; + case PTXRoundingMode::RndPosInf: + O << ".rp"; + break; + case PTXRoundingMode::RndApprox: + O << ".approx"; + break; + case PTXRoundingMode::RndNearestEvenInt: + O << ".rni"; + break; + case PTXRoundingMode::RndTowardsZeroInt: + O << ".rzi"; + break; + case PTXRoundingMode::RndNegInfInt: + O << ".rmi"; + break; + case PTXRoundingMode::RndPosInfInt: + O << ".rpi"; + break; + } +} + diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h new file mode 100644 index 0000000..86dfd48 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -0,0 +1,47 @@ +//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints n PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXINSTPRINTER_H +#define PTXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class PTXInstPrinter : public MCInstPrinter { +public: + PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printPredicate(const MCInst *MI, raw_ostream &O); + void printCall(const MCInst *MI, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index df0f63f..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMPTXDesc - PTXMCTargetDesc.cpp - PTXMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile deleted file mode 100644 index 35f5a7b..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h new file mode 100644 index 0000000..c6094be --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -0,0 +1,63 @@ +//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXBASEINFO_H +#define PTXBASEINFO_H + +#include "PTXMCTargetDesc.h" + +namespace llvm { + namespace PTXStateSpace { + enum { + Global = 0, // default to global state space + Constant = 1, + Local = 2, + Parameter = 3, + Shared = 4 + }; + } // namespace PTXStateSpace + + namespace PTXPredicate { + enum { + Normal = 0, + Negate = 1, + None = 2 + }; + } // namespace PTXPredicate + + /// Namespace to hold all target-specific flags. + namespace PTXRoundingMode { + // Instruction Flags + enum { + // Rounding Mode Flags + RndMask = 15, + RndDefault = 0, // --- + RndNone = 1, // <NONE> + RndNearestEven = 2, // .rn + RndTowardsZero = 3, // .rz + RndNegInf = 4, // .rm + RndPosInf = 5, // .rp + RndApprox = 6, // .approx + RndNearestEvenInt = 7, // .rni + RndTowardsZeroInt = 8, // .rzi + RndNegInfInt = 9, // .rmi + RndPosInfInt = 10 // .rpi + }; + } // namespace PTXII +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 23f70bd..a5af3b8 100644 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "PTXMCTargetDesc.h" #include "PTXMCAsmInfo.h" +#include "InstPrinter/PTXInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "PTXGenInstrInfo.inc" @@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() { return X; } -extern "C" void LLVMInitializePTXMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitPTXMCRegisterInfo(X, 0); + return X; } static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializePTXMCSubtargetInfo() { +static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createPTXMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + assert(SyntaxVariant == 0 && "We only have one syntax variant"); + return new PTXInstPrinter(MAI, STI); +} + +extern "C" void LLVMInitializePTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, createPTXMCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, createPTXMCSubtargetInfo); -} -extern "C" void LLVMInitializePTXMCAsmInfo() { - RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); - RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h index 28cab24..7d46cce 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.h +++ b/contrib/llvm/lib/Target/PTX/PTX.h @@ -15,34 +15,30 @@ #ifndef PTX_H #define PTX_H -#include "MCTargetDesc/PTXMCTargetDesc.h" +#include "MCTargetDesc/PTXBaseInfo.h" #include "llvm/Target/TargetMachine.h" namespace llvm { + class MachineInstr; + class MCInst; + class PTXAsmPrinter; class PTXTargetMachine; class FunctionPass; - namespace PTX { - enum StateSpace { - GLOBAL = 0, // default to global state space - CONSTANT = 1, - LOCAL = 2, - PARAMETER = 3, - SHARED = 4 - }; - - enum Predicate { - PRED_NORMAL = 0, - PRED_NEGATE = 1 - }; - } // namespace PTX - FunctionPass *createPTXISelDag(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXRegisterAllocator(); + + void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP); + } // namespace llvm; #endif // PTX_H diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td index f6fbe9f..693bb9c 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.td +++ b/contrib/llvm/lib/Target/PTX/PTX.td @@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", "Use Shader Model 1.3">; def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", - "Use Shader Model 2.0">; + "Use Shader Model 2.0", [FeatureDouble]>; def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", - "Use Shader Model 2.1">; + "Use Shader Model 2.1", [FeatureDouble]>; def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", - "Use Shader Model 2.2">; + "Use Shader Model 2.2", [FeatureDouble]>; def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", - "Use Shader Model 2.3">; + "Use Shader Model 2.3", [FeatureDouble]>; def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", "PTX_COMPUTE_1_0", @@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", "Use Compute Compatibility 1.3">; def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", "PTX_COMPUTE_2_0", - "Use Compute Compatibility 2.0">; + "Use Compute Compatibility 2.0", + [FeatureDouble]>; //===----------------------------------------------------------------------===// // PTX supported processors @@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; include "PTXRegisterInfo.td" //===----------------------------------------------------------------------===// -// Calling Conventions -//===----------------------------------------------------------------------===// - -include "PTXCallingConv.td" - -//===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// @@ -127,9 +122,20 @@ include "PTXInstrInfo.td" def PTXInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// PTX uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def PTXAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def PTX : Target { let InstructionSet = PTXInstrInfo; + let AssemblyWriters = [PTXAsmWriter]; } diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp index 2848d54..733744b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -15,9 +15,14 @@ #define DEBUG_TYPE "ptx-asm-printer" #include "PTX.h" +#include "PTXAsmPrinter.h" #include "PTXMachineFunctionInfo.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" #include "PTXTargetMachine.h" +#include "llvm/Argument.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -28,69 +33,32 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace { -class PTXAsmPrinter : public AsmPrinter { -public: - explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - const char *getPassName() const { return "PTX Assembly Printer"; } - - bool doFinalization(Module &M); - - virtual void EmitStartOfAsmFile(Module &M); - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); } - - virtual void EmitInstruction(const MachineInstr *MI); - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); - - unsigned GetOrCreateSourceID(StringRef FileName, - StringRef DirName); - - // autogen'd. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - -private: - void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(); - - StringMap<unsigned> SourceIdMap; -}; // class PTXAsmPrinter -} // namespace - static const char PARAM_PREFIX[] = "__param_"; static const char RETURN_PREFIX[] = "__ret_"; -static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ - if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; +static const char *getRegisterTypeName(unsigned RegNo, + const MachineRegisterInfo& MRI) { + const TargetRegisterClass *TRC = MRI.getRegClass(RegNo); + +#define TEST_REGCLS(cls, clsstr) \ + if (PTX::cls ## RegisterClass == TRC) return # clsstr; + TEST_REGCLS(RegPred, pred); TEST_REGCLS(RegI16, b16); TEST_REGCLS(RegI32, b32); @@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) { static const char *getStateSpaceName(unsigned addressSpace) { switch (addressSpace) { default: llvm_unreachable("Unknown state space"); - case PTX::GLOBAL: return "global"; - case PTX::CONSTANT: return "const"; - case PTX::LOCAL: return "local"; - case PTX::PARAMETER: return "param"; - case PTX::SHARED: return "shared"; + case PTXStateSpace::Global: return "global"; + case PTXStateSpace::Constant: return "const"; + case PTXStateSpace::Local: return "local"; + case PTXStateSpace::Parameter: return "param"; + case PTXStateSpace::Shared: return "shared"; } return NULL; } -static const char *getTypeName(const Type* type) { +static const char *getTypeName(Type* type) { while (true) { switch (type->getTypeID()) { default: llvm_unreachable("Unknown type"); @@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) { } case Type::ArrayTyID: case Type::PointerTyID: - type = dyn_cast<const SequentialType>(type)->getElementType(); + type = dyn_cast<SequentialType>(type)->getElementType(); break; } } @@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + // Emit the PTX .version and .target attributes OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + (ST.supportsDouble() ? "" @@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) EmitVariableDeclaration(i); } -bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - EmitFunctionDeclaration(); - EmitFunctionBody(); - return false; -} - void PTXAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(Twine("{")); const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); + + // Print register definitions + std::string regDefs; + unsigned numRegs; + + // pred + numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .pred %p<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i16 + numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b16 %rh<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i32 + numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b32 %r<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i64 + numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b64 %rd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f32 + numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f32 %f<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f64 + numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f64 %fd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } - // Print local variable definition - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) { - unsigned reg = *i; - - std::string def = "\t.reg ."; - def += getRegisterTypeName(reg); - def += ' '; - def += getRegisterName(reg); - def += ';'; - OutStreamer.EmitRawText(Twine(def)); + // Local params + for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); + i != e; ++i) { + regDefs += "\t.param .b"; + regDefs += utostr(PM.getParamSize(*i)); + regDefs += " "; + regDefs += PM.getParamName(*i); + regDefs += ";\n"; } + OutStreamer.EmitRawText(Twine(regDefs)); + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n"); for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); if (FrameInfo->getObjectSize(i) > 0) { - std::string def = "\t.reg .b"; - def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits - def += " s"; + std::string def = "\t.local .align "; + def += utostr(FrameInfo->getObjectAlignment(i)); + def += " .b8"; + def += " __local"; def += utostr(i); + def += "["; + def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits + def += "]"; def += ";"; OutStreamer.EmitRawText(Twine(def)); } } -} - -void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { - std::string str; - str.reserve(64); - - raw_string_ostream OS(str); - - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - - const MDNode *S = DL.getScope(MF->getFunction()->getContext()); - - // This is taken from DwarfDebug.cpp, which is conveniently not a public - // LLVM class. - StringRef Fn; - StringRef Dir; - unsigned Src = 1; - if (S) { - DIDescriptor Scope(S); - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Fn = CU.getFilename(); - Dir = CU.getDirectory(); - } else if (Scope.isFile()) { - DIFile F(S); - Fn = F.getFilename(); - Dir = F.getDirectory(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Fn = SP.getFilename(); - Dir = SP.getDirectory(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Fn = DB.getFilename(); - Dir = DB.getDirectory(); - } else - assert(0 && "Unexpected scope info"); - - Src = GetOrCreateSourceID(Fn, Dir); - } - OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), - 0, 0, 0, Fn); - - const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); - - OS << "\t.loc "; - OS << utostr(MDL.getFileNum()); - OS << " "; - OS << utostr(MDL.getLine()); - OS << " "; - OS << utostr(MDL.getColumn()); - OS << "\n"; - } - - - // Emit predicate - printPredicateOperand(MI, OS); - - // Write instruction to str - printInstruction(MI, OS); - OS << ';'; - OS.flush(); - - StringRef strref = StringRef(str); - OutStreamer.EmitRawText(strref); -} - -void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS) { - const MachineOperand &MO = MI->getOperand(opNum); - - switch (MO.getType()) { - default: - llvm_unreachable("<unknown operand type>"); - break; - case MachineOperand::MO_GlobalAddress: - OS << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_Immediate: - OS << (long) MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - OS << *MO.getMBB()->getSymbol(); - break; - case MachineOperand::MO_Register: - OS << getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_FPImmediate: - APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); - bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; - // Emit 0F for 32-bit floats and 0D for 64-bit doubles. - if (isFloat) { - OS << "0F"; - } - else { - OS << "0D"; - } - // Emit the encoded floating-point value. - if (constFP.getZExtValue() > 0) { - OS << constFP.toString(16, false); - } - else { - OS << "00000000"; - // If We have a double-precision zero, pad to 8-bytes. - if (!isFloat) { - OS << "00000000"; - } - } - break; - } -} - -void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - printOperand(MI, opNum, OS); - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; // don't print "+0" - - OS << "+"; - printOperand(MI, opNum+1, OS); + //unsigned Index = 1; + // Print parameter passing params + //for (PTXMachineFunctionInfo::param_iterator + // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { + // std::string def = "\t.param .b"; + // def += utostr(*i); + // def += " __ret_"; + // def += utostr(Index); + // Index++; + // def += ";"; + // OutStreamer.EmitRawText(Twine(def)); + //} } -void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(Twine("}")); } -void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); } void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { @@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { unsigned alignment = gv->getAlignment(); if (alignment != 0) { decl += ".align "; - decl += utostr(Log2_32(gv->getAlignment())); + decl += utostr(gv->getAlignment()); decl += " "; } if (PointerType::classof(gv->getType())) { - const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType()); - const Type* elementTy = pointerTy->getElementType(); + PointerType* pointerTy = dyn_cast<PointerType>(gv->getType()); + Type* elementTy = pointerTy->getElementType(); decl += ".b8 "; decl += gvsym->getName(); @@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy); + ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); unsigned numElements = arrayTy->getNumElements(); while (elementTy->isArrayTy()) { - arrayTy = dyn_cast<const ArrayType>(elementTy); + arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); numElements *= arrayTy->getNumElements(); @@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { if (gv->hasInitializer()) { - const Constant *C = gv->getInitializer(); + const Constant *C = gv->getInitializer(); if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { decl += " = {"; @@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { OutStreamer.AddBlankLine(); } -void PTXAsmPrinter::EmitFunctionDeclaration() { +void PTXAsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. if (!CurrentFnSym->isUndefined()) { @@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { } const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); const bool isKernel = MFI->isKernel(); const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + const MachineRegisterInfo& MRI = MF->getRegInfo(); std::string decl = isKernel ? ".entry" : ".func"; - unsigned cnt = 0; - if (!isKernel) { decl += " ("; - for (PTXMachineFunctionInfo::ret_iterator - i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; + if (ST.useParamSpaceForDeviceArgs()) { + for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + + decl += ".param .b"; + decl += utostr(PM.getParamSize(*i)); + decl += " "; + decl += PM.getParamName(*i); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i, MRI); + decl += " "; + decl += MFI->getRegisterName(*i); } - decl += ".reg ."; - decl += getRegisterTypeName(*i); - decl += " "; - decl += getRegisterName(*i); } decl += ")"; } @@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " ("; - cnt = 0; + const Function *F = MF->getFunction(); // Print parameters - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; - } - if (isKernel || ST.useParamSpaceForDeviceArgs()) { + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".param .b"; - decl += utostr(*i); + decl += utostr(PM.getParamSize(*i)); decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } else { + decl += PM.getParamName(*i); + }*/ + int Counter = 1; + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), + b = i; i != e; ++i) { + if (i != b) + decl += ", "; + const Type *ArgType = (*i).getType(); + decl += ".param .b"; + if (ArgType->isPointerTy()) { + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + } else { + decl += utostr(ArgType->getPrimitiveSizeInBits()); + } + if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { + const PointerType *PtrType = dyn_cast<const PointerType>(ArgType); + decl += " .ptr"; + switch (PtrType->getAddressSpace()) { + default: + llvm_unreachable("Unknown address space in argument"); + case PTXStateSpace::Global: + decl += " .global"; + break; + case PTXStateSpace::Shared: + decl += " .shared"; + break; + } + } + decl += " __param_"; + decl += utostr(Counter++); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; - decl += getRegisterTypeName(*i); + decl += getRegisterTypeName(*i, MRI); decl += " "; - decl += getRegisterName(*i); + decl += MFI->getRegisterName(*i); } } decl += ")"; @@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { OutStreamer.EmitRawText(Twine(decl)); } -void PTXAsmPrinter:: -printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { - int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - - unsigned reg = MI->getOperand(i).getReg(); - int predOp = MI->getOperand(i+1).getImm(); - - DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); - - if (reg != PTX::NoRegister) { - O << '@'; - if (predOp == PTX::PRED_NEGATE) - O << '!'; - O << getRegisterName(reg); - } -} - unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. @@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, return SrcId; } -#include "PTXGenAsmWriter.inc" +MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, + const MCSymbol *Symbol) { + const MCExpr *Expr; + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + return MCOperand::CreateExpr(Expr); +} + +MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { + MCOperand MCOp; + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const MCExpr *Expr; + const char *RegSymbolName; + switch (MO.getType()) { + default: + llvm_unreachable("Unknown operand type"); + case MachineOperand::MO_Register: + // We create register operands as symbols, since the PTXInstPrinter class + // has no way to map virtual registers back to a name without some ugly + // hacks. + // FIXME: Figure out a better way to handle virtual register naming. + RegSymbolName = MFI->getRegisterName(MO.getReg()); + Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None, + OutContext); + MCOp = MCOperand::CreateExpr(Expr); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_FPImmediate: + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } + + return MCOp; +} // Force static initialization. extern "C" void LLVMInitializePTXAsmPrinter() { RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); } + diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h new file mode 100644 index 0000000..538c080 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h @@ -0,0 +1,57 @@ +//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PTX Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXASMPRINTER_H +#define PTXASMPRINTER_H + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual void EmitFunctionEntryLabel(); + virtual void EmitInstruction(const MachineInstr *MI); + + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + + MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand lowerOperand(const MachineOperand &MO); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; +}; // class PTXAsmPrinter +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td deleted file mode 100644 index 3e3ff48..0000000 --- a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td +++ /dev/null @@ -1,29 +0,0 @@ - -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, - CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, - CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, - CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, - CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> -]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp new file mode 100644 index 0000000..0b653e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -0,0 +1,179 @@ +//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a machine function pass that sets appropriate FP rounding +// modes for all relevant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-fp-rounding-mode" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXFPRoundingModePass should be executed just before emission. + +namespace llvm { + /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to + /// all FP instructions. Essentially, this pass just looks for all FP + /// instructions that have a rounding mode set to RndDefault, and sets an + /// appropriate rounding mode based on the target device. + /// + class PTXFPRoundingModePass : public MachineFunctionPass { + private: + static char ID; + + typedef std::pair<unsigned, unsigned> RndModeDesc; + + PTXTargetMachine& TargetMachine; + DenseMap<unsigned, RndModeDesc> Instrs; + + public: + PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID), + TargetMachine(TM) { + initializeMap(); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX FP Rounding Mode Pass"; + } + + private: + + void initializeMap(); + void processInstruction(MachineInstr &MI); + }; // class PTXFPRoundingModePass +} // namespace llvm + +using namespace llvm; + +char PTXFPRoundingModePass::ID = 0; + +bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { + // Look at each basic block + for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; + ++bbi) { + MachineBasicBlock &MBB = *bbi; + // Look at each instruction + for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); + ii != ie; ++ii) { + MachineInstr &MI = *ii; + processInstruction(MI); + } + } + return false; +} + +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; + const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); + } + } +} + +FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXFPRoundingModePass(TM, OptLevel); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp index 9adfa62..5c7ee29 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "PTX.h" +#include "PTXMachineFunctionInfo.h" #include "PTXTargetMachine.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" #include "llvm/Support/Debug.h" @@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel { bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); // Include the pieces auto'gened from the target description #include "PTXGenDAGISel.inc" @@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel { // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); + SDNode *SelectREADPARAM(SDNode *Node); + SDNode *SelectWRITEPARAM(SDNode *Node); + SDNode *SelectFrameIndex(SDNode *Node); + bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); @@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { case ISD::BRCOND: return SelectBRCOND(Node); + case PTXISD::READ_PARAM: + return SelectREADPARAM(Node); + case PTXISD::WRITE_PARAM: + return SelectWRITEPARAM(Node); + case ISD::FrameIndex: + return SelectFrameIndex(Node); default: return SelectCode(Node); } @@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Pred = Node->getOperand(1); SDValue Target = Node->getOperand(2); // branch target - SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); DebugLoc dl = Node->getDebugLoc(); assert(Target.getOpcode() == ISD::BasicBlock); @@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); } +SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDValue Index = Node->getOperand(1); + + int OpCode; + + // Get the type of parameter we are reading + EVT VT = Node->getValueType(0); + assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::READPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::READPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::READPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::READPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::READPARAMF32; + else { + assert(Type == MVT::f64 && "Unexpected type!"); + OpCode = PTX::READPARAMF64; + } + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Index, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); +} + +SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { + + SDValue Chain = Node->getOperand(0); + SDValue Value = Node->getOperand(1); + + int OpCode; + + //Node->dumpr(CurDAG); + + // Get the type of parameter we are writing + EVT VT = Value->getValueType(0); + assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::WRITEPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::WRITEPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::WRITEPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::WRITEPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::WRITEPARAMF32; + else if (Type == MVT::f64) + OpCode = PTX::WRITEPARAMF64; + else + llvm_unreachable("Invalid type in SelectWRITEPARAM"); + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Value, Pred, PredOp, Chain }; + SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); + + //dbgs() << "SelectWRITEPARAM produced:\n\t"; + //Ret->dumpr(CurDAG); + + return Ret; +} + +SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; + //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); + + PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), + Node->getValueType(0)); + + return FrameSymbol.getNode(); +} + // Match memory operand of the form [reg+reg] bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || @@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { // Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { + // FrameIndex addresses are handled separately + //errs() << "SelectADDRri: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + //errs() << "Failure\n"; + return false; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Success\n"; + return true; + } + + /*if (Addr.getNumOperands() == 1) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + errs() << "Success\n"; + return true; + }*/ + + //errs() << "SelectADDRri fails on: "; + //Addr.getNode()->dumpr(); + + if (isImm(Addr)) { + //errs() << "Failure\n"; + return false; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + //errs() << "Success\n"; + return true; + + /*if (Addr.getOpcode() != ISD::ADD) { // let SelectADDRii handle the [imm] case if (isImm(Addr)) return false; // it is [reg] assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); @@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, } // neither [reg+imm] nor [imm+reg] - return false; + return false;*/ } // Match memory operand of the form [imm+imm] and [imm] @@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, return false; } +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + //errs() << "SelectADDRlocal: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + //errs() << "Success\n"; + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (!isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Offset: "; + //Offset.getNode()->dumpr(); + //errs() << "Success\n"; + return true; + } + + //errs() << "Failure\n"; + return false; +} + bool PTXDAGToDAGISel::isImm(const SDValue &operand) { return ConstantSDNode::classof(operand.getNode()); } diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp index 6fcf710..3307d91 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -16,23 +16,19 @@ #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "PTXSubtarget.h" +#include "llvm/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "PTXGenCallingConv.inc" - -//===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setMinFunctionAlignment(2); - + //////////////////////////////////// /////////// Expansion ////////////// //////////////////////////////////// - + // (any/zero/sign) extload => load + (any/zero/sign) extend - + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - + // f32 extload => load + fextend - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // f64 truncstore => trunc + store - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // sign_extend_inreg => sign_extend - + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - + // br_cc => brcond - + setOperationAction(ISD::BR_CC, MVT::Other, Expand); // select_cc => setcc - + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - + //////////////////////////////////// //////////// Legal ///////////////// //////////////////////////////////// - + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - + //////////////////////////////////// //////////// Custom //////////////// //////////////////////////////////// - + // customise setcc to use bitwise logic if possible - + setOperationAction(ISD::SETCC, MVT::i1, Custom); // customize translation of memory addresses - + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); @@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) computeRegisterProperties(); } -MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { +EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } @@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PTXISD::LOAD_PARAM"; case PTXISD::STORE_PARAM: return "PTXISD::STORE_PARAM"; + case PTXISD::READ_PARAM: + return "PTXISD::READ_PARAM"; + case PTXISD::WRITE_PARAM: + return "PTXISD::WRITE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: return "PTXISD::RET"; + case PTXISD::CALL: + return "PTXISD::CALL"; } } @@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - // Look for X == 0, X == 1, X != 0, or X != 1 + // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic if (Op1.getOpcode() == ISD::Constant && @@ -197,6 +200,7 @@ SDValue PTXTargetLowering:: MachineFunction &MF = DAG.getMachineFunction(); const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); switch (CallConv) { default: @@ -216,68 +220,34 @@ SDValue PTXTargetLowering:: if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { // We just need to emit the proper LOAD_PARAM ISDs for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && "Kernels cannot take pred operands"); + unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addArgumentParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); + ParamValue); InVals.push_back(ArgValue); - - // Instead of storing a physical register in our argument list, we just - // store the total size of the parameter, in bits. The ASM printer - // knows how to process this. - MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); } } else { - // For device functions, we use the PTX calling convention to do register - // assignments then create CopyFromReg ISDs for the allocated registers - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, - *DAG.getContext()); - - CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - - CCValAssign& VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - TargetRegisterClass* TRC = 0; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - - // Determine which register class we need - if (RegVT == MVT::i1) { - TRC = PTX::RegPredRegisterClass; - } - else if (RegVT == MVT::i16) { - TRC = PTX::RegI16RegisterClass; - } - else if (RegVT == MVT::i32) { - TRC = PTX::RegI32RegisterClass; - } - else if (RegVT == MVT::i64) { - TRC = PTX::RegI64RegisterClass; - } - else if (RegVT == MVT::f32) { - TRC = PTX::RegF32RegisterClass; - } - else if (RegVT == MVT::f64) { - TRC = PTX::RegF64RegisterClass; - } - else { - llvm_unreachable("Unknown parameter type"); - } + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT RegVT = Ins[i].VT; + TargetRegisterClass* TRC = getRegClassFor(RegVT); + // Use a unique index in the instruction to prevent instruction folding. + // Yes, this is a hack. + SDValue Index = DAG.getTargetConstant(i, MVT::i32); unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, + Index); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); InVals.push_back(ArgValue); - MFI->addArgReg(VA.getLocReg()); + MFI->addArgReg(Reg); } } @@ -301,41 +271,66 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - //assert(Outs.size() <= 1 && "Can at most return one value."); + assert(Outs.size() <= 1 && "Can at most return one value."); break; } MachineFunction& MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); SDValue Flag; + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); - // Even though we could use the .param space for return arguments for - // device functions if SM >= 2.0 and the number of return arguments is - // only 1, we just always use registers since this makes the codegen - // easier. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeReturn(Outs, RetCC_PTX); - - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign& VA = RVLocs[i]; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + if (ST.useParamSpaceForDeviceArgs()) { + assert(Outs.size() < 2 && "Device functions can return at most one value"); + + if (Outs.size() == 1) { + unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); + unsigned Param = PM.addReturnParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[0]); + } + } else { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT RegVT = Outs[i].VT; + TargetRegisterClass* TRC = 0; - unsigned Reg = VA.getLocReg(); + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } - DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); + SDValue OutReg = DAG.getRegister(Reg, RegVT); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - MFI->addRetReg(Reg); + MFI->addRetReg(Reg); + } } if (Flag.getNode() == 0) { @@ -345,3 +340,83 @@ SDValue PTXTargetLowering:: return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } } + +SDValue +PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); + + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && + "Calls are not handled for the target device"); + + std::vector<SDValue> Ops; + // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] + Ops.resize(Outs.size() + Ins.size() + 4); + + Ops[0] = Chain; + + // Identify the callee function + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device && + "PTX function calls must be to PTX device functions"); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + Ops[Ins.size()+2] = Callee; + + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32); + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + + std::vector<SDValue> InParams; + + // Generate list of .param variables to hold the return value(s). + Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); + for (unsigned i = 0; i < Ins.size(); ++i) { + unsigned Size = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Ops[i+2] = ParamValue; + InParams.push_back(ParamValue); + } + + Ops[0] = Chain; + + // Create the CALL node. + Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); + + // Create the LOAD_PARAM nodes that retrieve the function return value(s). + for (unsigned i = 0; i < Ins.size(); ++i) { + SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + InParams[i]); + InVals.push_back(Load); + } + + return Chain; +} + +unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { + // All arguments consist of one "register," regardless of the type. + return 1; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h index 4318541..4d25665 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h @@ -26,9 +26,12 @@ namespace PTXISD { FIRST_NUMBER = ISD::BUILTIN_OP_END, LOAD_PARAM, STORE_PARAM, + READ_PARAM, + WRITE_PARAM, EXIT, RET, - COPY_ADDRESS + COPY_ADDRESS, + CALL }; } // namespace PTXISD @@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering { DebugLoc dl, SelectionDAG &DAG) const; - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual EVT getSetCCResultType(EVT VT) const; + + virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td index 8cee351..397fdc3 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td @@ -7,12 +7,39 @@ // //===----------------------------------------------------------------------===// -// PTX Predicate operand, default to (0, 0) = (zero-reg, always). + +// Rounding Mode Specifier +/*class RoundingMode<bits<3> val> { + bits<3> Value = val; +} + +def RndDefault : RoundingMode<0>; +def RndNearestEven : RoundingMode<1>; +def RndNearestZero : RoundingMode<2>; +def RndNegInf : RoundingMode<3>; +def RndPosInf : RoundingMode<4>; +def RndApprox : RoundingMode<5>;*/ + + +// Rounding Mode Operand +def RndMode : Operand<i32> { + let PrintMethod = "printRoundingMode"; +} + +def RndDefault : PatLeaf<(i32 0)>; + +// PTX Predicate operand, default to (0, 0) = (zero-reg, none). // Leave PrintMethod empty; predicate printing is defined elsewhere. def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), - (ops (i1 zero_reg), (i32 0))>; + (ops (i1 zero_reg), (i32 2))>; +def RndModeOperand : Operand<OtherVT> { + let MIOperandInfo = (ops i32imm); +} + +// Instruction Types let Namespace = "PTX" in { + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> : Instruction { dag OutOperandList = oops; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp index 425265a..1b947a5 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp @@ -16,10 +16,11 @@ #include "PTX.h" #include "PTXInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR @@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { - if (map[i].cls->contains(DstReg, SrcReg)) { + + const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && + // "Invalid register copy between two register classes"); + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { + if (map[i].cls == MRI.getRegClass(DstReg)) { const MCInstrDesc &MCID = get(map[i].opcode); MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); @@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI, return false; Pred.push_back(MO); - Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); return true; } @@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(PTX::BRAdp)) .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 2; } else if (Cond.size()) { BuildMI(&MBB, DL, get(PTX::BRAdp)) @@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB, return 1; } else { BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 1; } } @@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "storeRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKSTOREI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKSTOREF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKSTOREF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the store instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addFrameIndex(FrameIdx); - MIB.addReg(SrcReg); - - AddDefaultPredicate(MIB); + assert(false && "storeRegToStackSlot should not be called for PTX"); } void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "loadRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKLOADI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKLOADF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKLOADF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the load instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addReg(DestReg); - MIB.addFrameIndex(FrameIdx); - - AddDefaultPredicate(MIB); + assert(false && "loadRegFromStackSlot should not be called for PTX"); } // static helper routines @@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, Op2, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { if (MI->findFirstPredOperandIdx() == -1) { MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); } } diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td index 6bfe906..a3fcea9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td @@ -21,10 +21,6 @@ include "PTXInstrFormats.td" // Code Generation Predicates //===----------------------------------------------------------------------===// -// Addressing -def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; -def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; - // Shader Model Support def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; @@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; -//===----------------------------------------------------------------------===// -// Instruction Pattern Stuff -//===----------------------------------------------------------------------===// -def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::CONSTANT; - return false; -}]>; - -def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -def store_global - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def store_local - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def store_parameter - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def store_shared - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -// Addressing modes. -def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; -def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; -def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; -def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; - -// Address operands -def MEMri32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI32, i32imm); -} -def MEMri64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI64, i64imm); -} -def MEMii32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def MEMii64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -// The operand here does not correspond to an actual address, so we -// can use i32 in 64-bit address modes. -def MEMpi : Operand<i32> { - let PrintMethod = "printParamOperand"; - let MIOperandInfo = (ops i32imm); -} -def MEMret : Operand<i32> { - let PrintMethod = "printReturnOperand"; - let MIOperandInfo = (ops i32imm); -} + +// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, +// [SDNPHasChain, SDNPOutGlue]>; +// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, +// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def PTXcall : SDNode<"PTXISD::CALL", SDTNone, + [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; + // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -189,87 +74,73 @@ def PTXret def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; -// Load/store .param space -def PTXloadparam - : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXstoreparam - : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// +// For floating-point instructions, we cannot just embed the pattern into the +// instruction definition since we need to muck around with the rounding mode, +// and I do not know how to insert constants into instructions directly from +// pattern matches. + //===- Floating-Point Instructions - 2 Operand Form -----------------------===// -multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_2OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode RegF32:$a))]>; + (ins RndMode:$r, RegF32:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins f32imm:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f32imm:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode RegF64:$a))]>; + (ins RndMode:$r, RegF64:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins f64imm:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f64imm:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; } //===- Floating-Point Instructions - 3 Operand Form -----------------------===// -multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_3OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF32:$a, f32imm:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF64:$a, f64imm:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; } //===- Floating-Point Instructions - 4 Operand Form -----------------------===// -multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { +multiclass PTX_FLOAT_4OP<string opcstr> { def rrr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, RegF32:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - RegF32:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, f32imm:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rii32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rrr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, RegF64:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - RegF64:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; def rri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, f64imm:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; + def rii64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; } -multiclass INT3<string opcstr, SDNode opnode> { +//===- Integer Instructions - 3 Operand Form ------------------------------===// +multiclass PTX_INT3<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), @@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } +//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// +multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// multiclass PTX_LOGIC<string opcstr, SDNode opnode> { def ripreds : InstPTX<(outs RegPred:$d), (ins RegPred:$a, i1imm:$b), @@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } -multiclass INT3ntnc<string opcstr, SDNode opnode> { +//===- Integer Shift Instructions - 3 Operand Form ------------------------===// +multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), @@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; } +//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, CondCode cmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, def rr_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; } -multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, +//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls, CondCode ucmp, CondCode ocmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; + def ri_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; + def ri_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; + def rr_and_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_and_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_or_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_or_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_xor_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_xor_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_and_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_and_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_or_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_or_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_xor_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_xor_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; } -multiclass PTX_SELP<RegisterClass RC, string regclsname> { +//===- Select Predicate Instructions - 4 Operand Form ---------------------===// +multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls, + SDNode immnode> { def rr : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; + def ri + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; + def ii + : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; } -multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { - def rr32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs RC:$d), - (ins MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs RC:$d), - (ins MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; -} - -multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; -} - -multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { - def rr32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs), - (ins RC:$d, MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs), - (ins RC:$d, MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; -} -multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; -} //===----------------------------------------------------------------------===// // Instructions @@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { ///===- Integer Arithmetic Instructions -----------------------------------===// -defm ADD : INT3<"add", add>; -defm SUB : INT3<"sub", sub>; -defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies -defm DIV : INT3<"div", udiv>; -defm REM : INT3<"rem", urem>; +defm ADD : PTX_INT3<"add", add>; +defm SUB : PTX_INT3<"sub", sub>; +defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : PTX_INT3<"div", udiv>; +defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; +defm REM : PTX_INT3<"rem", urem>; ///===- Floating-Point Arithmetic Instructions ----------------------------===// -// Standard Unary Operations -defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; +// FNEG +defm FNEG : PTX_FLOAT_2OP<"neg">; // Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; -defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; -defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; - -// For floating-point division: -// SM_13+ defaults to .rn for f32 and f64, -// SM10 must *not* provide a rounding - -// TODO: -// - Allow user selection of rounding modes for fdiv -// - Add support for -prec-div=false (.approx) - -def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - -def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - - +defm FADD : PTX_FLOAT_3OP<"add">; +defm FSUB : PTX_FLOAT_3OP<"sub">; +defm FMUL : PTX_FLOAT_3OP<"mul">; +defm FDIV : PTX_FLOAT_3OP<"div">; // Multi-operation hybrid instructions +defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; -// The selection of mad/fma is tricky. In some cases, they are the *same* -// instruction, but in other cases we may prefer one or the other. Also, -// different PTX versions differ on whether rounding mode flags are required. -// In the short term, mad is supported on all PTX versions and we use a -// default rounding mode no matter what shader model or PTX version. -// TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, - Requires<[FMadNeedsRoundingMode, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, - Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// -def FSQRT32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sqrt.rn.f32\t$d, $a", - [(set RegF32:$d, (fsqrt RegF32:$a))]>; - -def FSQRT64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sqrt.rn.f64\t$d, $a", - [(set RegF64:$d, (fsqrt RegF64:$a))]>; - -def FSIN32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sin.approx.f32\t$d, $a", - [(set RegF32:$d, (fsin RegF32:$a))]>; +// SQRT +def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sqrt$r.f64\t$d, $a", []>; +def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sqrt$r.f64\t$d, $a", []>; + +// SIN +def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sin$r.f32\t$d, $a", []>; +def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sin$r.f32\t$d, $a", []>; +def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sin$r.f64\t$d, $a", []>; +def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sin$r.f64\t$d, $a", []>; + +// COS +def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "cos$r.f64\t$d, $a", []>; +def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "cos$r.f64\t$d, $a", []>; -def FSIN64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sin.approx.f64\t$d, $a", - [(set RegF64:$d, (fsin RegF64:$a))]>; -def FCOS32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "cos.approx.f32\t$d, $a", - [(set RegF32:$d, (fcos RegF32:$a))]>; - -def FCOS64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "cos.approx.f64\t$d, $a", - [(set RegF64:$d, (fcos RegF64:$a))]>; ///===- Comparison and Selection Instructions -----------------------------===// @@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; // Compare f32 -defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">; // Compare f64 -defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">; // .selp -defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; -defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; -defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; -defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; -defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; +defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>; +defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>; +defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>; +defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>; +defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>; ///===- Logic and Shift Instructions --------------------------------------===// -defm SHL : INT3ntnc<"shl.b", PTXshl>; -defm SRL : INT3ntnc<"shr.u", PTXsrl>; -defm SRA : INT3ntnc<"shr.s", PTXsra>; +defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; +defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; +defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; defm AND : PTX_LOGIC<"and", and>; defm OR : PTX_LOGIC<"or", or>; @@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>; ///===- Data Movement and Conversion Instructions -------------------------===// +// any_extend +// Implement the anyext instruction in terms of the PTX cvt instructions. +//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; + +// bitconvert +// These instructions implement the bit-wise conversion between integer and +// floating-point types. +def MOVi32f32 + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; +def MOVf32i32 + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; +def MOVi64f64 + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; +def MOVf64i64 + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; + let neverHasSideEffects = 1 in { def MOVPREDrr : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; @@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; } -// Loads -defm LDg : PTX_LD_ALL<"ld.global", load_global>; -defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDl : PTX_LD_ALL<"ld.local", load_local>; -defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; +// PTX cvt instructions +// Note all of these may actually be used, we just define all possible patterns +// here (that make sense). +// FIXME: Can we collapse this somehow into a multiclass def? + +// To i16 +def CVTu16u32 + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; +def CVTu16u64 + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; +def CVTu16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u16.f32\t$d, $a", []>; +def CVTs16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s16.f32\t$d, $a", []>; +def CVTu16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u16.f64\t$d, $a", []>; +def CVTs16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s16.f64\t$d, $a", []>; + +// To i32 +def CVTu32u16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; +def CVTs32s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; +def CVTu32u64 + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; +def CVTu32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u32.f32\t$d, $a", []>; +def CVTs32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s32.f32\t$d, $a", []>; +def CVTu32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u32.f64\t$d, $a", []>; +def CVTs32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s32.f64\t$d, $a", []>; + +// To i64 +def CVTu64u16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; +def CVTs64s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; +def CVTu64u32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; +def CVTs64s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; +def CVTu64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u64.f32\t$d, $a", []>; +def CVTs64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s64.f32\t$d, $a", []>; +def CVTu64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u64.f64\t$d, $a", []>; +def CVTs64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s64.f64\t$d, $a", []>; + +// To f32 +def CVTf32u16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.u16\t$d, $a", []>; +def CVTf32s16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.s16\t$d, $a", []>; +def CVTf32u32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.u32\t$d, $a", []>; +def CVTf32s32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.s32\t$d, $a", []>; +def CVTf32u64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.u64\t$d, $a", []>; +def CVTf32s64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.s64\t$d, $a", []>; +def CVTf32f64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.f32.f64\t$d, $a", []>; + +// To f64 +def CVTf64u16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.u16\t$d, $a", []>; +def CVTf64s16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.s16\t$d, $a", []>; +def CVTf64u32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.u32\t$d, $a", []>; +def CVTf64s32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.s32\t$d, $a", []>; +def CVTf64u64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.u64\t$d, $a", []>; +def CVTf64s64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.s64\t$d, $a", []>; +def CVTf64f32 + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; + + ///===- Control Flow Instructions -----------------------------------------===// -// These instructions are used to load/store from the .param space for -// device and kernel parameters +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} -let hasSideEffects = 1 in { - def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), - "ld.param.pred\t$d, [$a]", - [(set RegPred:$d, (PTXloadparam timm:$a))]>; - def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", - [(set RegI16:$d, (PTXloadparam timm:$a))]>; - def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", - [(set RegI32:$d, (PTXloadparam timm:$a))]>; - def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", - [(set RegI64:$d, (PTXloadparam timm:$a))]>; - def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", - [(set RegF32:$d, (PTXloadparam timm:$a))]>; - def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", - [(set RegF64:$d, (PTXloadparam timm:$a))]>; - - def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), - "st.param.pred\t[$d], $a", - [(PTXstoreparam timm:$d, RegPred:$a)]>; - def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), - "st.param.u16\t[$d], $a", - [(PTXstoreparam timm:$d, RegI16:$a)]>; - def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), - "st.param.u32\t[$d], $a", - [(PTXstoreparam timm:$d, RegI32:$a)]>; - def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), - "st.param.u64\t[$d], $a", - [(PTXstoreparam timm:$d, RegI64:$a)]>; - def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), - "st.param.f32\t[$d], $a", - [(PTXstoreparam timm:$d, RegF32:$a)]>; - def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), - "st.param.f64\t[$d], $a", - [(PTXstoreparam timm:$d, RegF64:$a)]>; +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a RegPred register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; } -// Stores -defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STl : PTX_ST_ALL<"st.local", store_local>; -defm STs : PTX_ST_ALL<"st.shared", store_shared>; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} -// defm STp : PTX_ST_ALL<"st.param", store_parameter>; -// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; -// TODO: Do something with st.param if/when it is needed. +let hasSideEffects = 1 in { + def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; +} -// Conversion to pred -// PTX does not directly support converting to a predicate type, so we fake it -// by performing a greater-than test between the value and zero. This follows -// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_u16 - : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI16:$a))]>; +///===- Parameter Passing Pseudo-Instructions -----------------------------===// + +def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), + "mov.pred\t$a, %param$b", []>; +def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), + "mov.b16\t$a, %param$b", []>; +def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), + "mov.b32\t$a, %param$b", []>; +def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), + "mov.b64\t$a, %param$b", []>; +def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), + "mov.f32\t$a, %param$b", []>; +def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), + "mov.f64\t$a, %param$b", []>; + +def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; +def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; +def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; +def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; +def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; +def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; -def CVT_pred_u32 - : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI32:$a))]>; -def CVT_pred_u64 - : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI64:$a))]>; +//===----------------------------------------------------------------------===// +// Instruction Selection Patterns +//===----------------------------------------------------------------------===// -def CVT_pred_f32 - : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; +// FADD +def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), + (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), + (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), + (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), + (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FSUB +def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), + (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), + (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), + (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), + (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL +def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), + (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), + (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), + (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), + (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FDIV +def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), + (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), + (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), + (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), + (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL+FADD +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), + (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), + (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), + (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>; + +// FNEG +def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; + +// FSQRT +def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; + +// FSIN +def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; + +// FCOS +def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; + +// Type conversion notes: +// - PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +// - PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_f64 - : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; +// Conversion to pred +def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; +def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; +def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; +def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; +def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; // Conversion to u16 -// PTX does not directly support converting a predicate to a value, so we -// use a select instruction to select either 0 or 1 (integer or fp) based -// on the truth value of the predicate. -def CVT_u16_preda - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (anyext RegPred:$a))]>; - -def CVT_u16_pred - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (zext RegPred:$a))]>; - -def CVT_u16_preds - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (sext RegPred:$a))]>; - -def CVT_u16_u32 - : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", - [(set RegI16:$d, (trunc RegI32:$a))]>; - -def CVT_u16_u64 - : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", - [(set RegI16:$d, (trunc RegI64:$a))]>; - -def CVT_u16_f32 - : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u16_f64 - : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; +def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; +def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; +def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; +def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; // Conversion to u32 - -def CVT_u32_pred - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (zext RegPred:$a))]>; - -def CVT_u32_b16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (anyext RegI16:$a))]>; - -def CVT_u32_u16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (zext RegI16:$a))]>; - -def CVT_u32_preds - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (sext RegPred:$a))]>; - -def CVT_u32_s16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", - [(set RegI32:$d, (sext RegI16:$a))]>; - -def CVT_u32_u64 - : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", - [(set RegI32:$d, (trunc RegI64:$a))]>; - -def CVT_u32_f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u32_f64 - : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; +def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; +def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; +def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; // Conversion to u64 - -def CVT_u64_pred - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (zext RegPred:$a))]>; - -def CVT_u64_preds - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (sext RegPred:$a))]>; - -def CVT_u64_u16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", - [(set RegI64:$d, (zext RegI16:$a))]>; - -def CVT_u64_s16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", - [(set RegI64:$d, (sext RegI16:$a))]>; - -def CVT_u64_u32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", - [(set RegI64:$d, (zext RegI32:$a))]>; - -def CVT_u64_s32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", - [(set RegI64:$d, (sext RegI32:$a))]>; - -def CVT_u64_f32 - : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u64_f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, + 0xFFFFFFFFFFFFFFFF, 0)>; +def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; +def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; +def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; // Conversion to f32 - -def CVT_f32_pred - : InstPTX<(outs RegF32:$d), (ins RegPred:$a), - "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 - [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f32_u16 - : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f32_u32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f32_u64 - : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f32_f64 - : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", - [(set RegF32:$d, (fround RegF64:$a))]>; +def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, + (MOVf32i32 0x3F800000), (MOVf32i32 0))>; +def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; +def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; // Conversion to f64 +def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, + (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; +def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; +def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; -def CVT_f64_pred - : InstPTX<(outs RegF64:$d), (ins RegPred:$a), - "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 - [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f64_u16 - : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f64_u32 - : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f64_u64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f64_f32 - : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", - [(set RegF64:$d, (fextend RegF32:$a))]>; - -///===- Control Flow Instructions -----------------------------------------===// - -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { - def BRAd - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a RegPred register) here - def BRAdp - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", - [/*(brcond pred:$_p, bb:$d)*/]>; -} - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; - def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; -} - -///===- Spill Instructions ------------------------------------------------===// -// Special instructions used for stack spilling -def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), - "mov.u16\ts$d, $a", []>; -def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), - "mov.u32\ts$d, $a", []>; -def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), - "mov.u64\ts$d, $a", []>; -def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), - "mov.f32\ts$d, $a", []>; -def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), - "mov.f64\ts$d, $a", []>; - -def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), - "mov.u16\t$d, s$a", []>; -def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), - "mov.u32\t$d, s$a", []>; -def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), - "mov.u64\t$d, s$a", []>; -def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), - "mov.f32\t$d, s$a", []>; -def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), - "mov.f64\t$d, s$a", []>; ///===- Intrinsic Instructions --------------------------------------------===// - include "PTXIntrinsicInstrInfo.td" + +///===- Load/Store Instructions -------------------------------------------===// +include "PTXInstrLoadStore.td" + diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td new file mode 100644 index 0000000..9b4f56c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td @@ -0,0 +1,278 @@ +//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX load/store instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Addressing Predicates +// We have to differentiate between 32- and 64-bit pointer types +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +//===----------------------------------------------------------------------===// +// Pattern Fragments for Loads/Stores +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Constant; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +// Addressing modes. +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; +def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; + +// Address operands +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} +def LOCALri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def LOCALri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +def MEMii32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} + + +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + +def PTXreadparam + : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXwriteparam + : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + + + +//===----------------------------------------------------------------------===// +// Classes for loads/stores +//===----------------------------------------------------------------------===// +multiclass PTX_LD<string opstr, string typestr, + RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, + PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { + def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal32:$a))]>; + def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal64:$a))]>; + def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal32:$a)]>; + def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal64:$a)]>; +} + +multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> { + let hasSideEffects = 1 in { + def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), + !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (PTXloadparam texternalsym:$a))]>; + def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), + !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), + [(PTXstoreparam texternalsym:$d, RC:$a)]>; + } +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; +} + + + +//===----------------------------------------------------------------------===// +// Instruction definitions for loads/stores +//===----------------------------------------------------------------------===// + +// Global/shared stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// Global/shared/constant loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// Param loads/stores +defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; +defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; +defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; +defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; +defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; +defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; + +// Local loads/stores +defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; +defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; +defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; +defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; +defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; +defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 8d97909..9de1cb6 100644 --- a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> // TODO Add read vector-version of special registers -//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", +// int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; -//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", +// int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; -def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; -def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; -def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; -//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; +//def PTX_READ_CTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; -//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; +//def PTX_READ_NCTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; -def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; -def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; -def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; def PTX_READ_LANEMASK_EQ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp index b13a3da..468ce93 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -100,7 +100,7 @@ public: /// @{ virtual void ChangeSection(const MCSection *Section); - virtual void InitSections() {} + virtual void InitSections() { /* PTX does not use sections */ } virtual void EmitLabel(MCSymbol *Symbol); @@ -132,7 +132,9 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + /// @param ByteAlignment - The alignment of the common symbol in bytes. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); @@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - //assert(getCurrentSection() && "Cannot emit before setting section!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); @@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} -void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size, unsigned ByteAlignment) {} @@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) - InstPrinter->printInst(&Inst, OS); + InstPrinter->printInst(&Inst, OS, ""); else Inst.print(OS, &MAI); EmitEOL(); @@ -533,7 +536,7 @@ namespace llvm { formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, - MCCodeEmitter *CE, TargetAsmBackend *TAB, + MCCodeEmitter *CE, MCAsmBackend *MAB, bool ShowInst) { return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, IP, CE, ShowInst); diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp new file mode 100644 index 0000000..142e639 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp @@ -0,0 +1,32 @@ +//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PTX MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXAsmPrinter.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + OutMI.addOperand(AP.lowerOperand(MO)); + } +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp index 6fe9e6c..b33a273 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - - DEBUG(dbgs() - << "PTX::NoRegister == " << PTX::NoRegister << "\n" - << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); - - DEBUG(for (unsigned reg = PTX::NoRegister + 1; - reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg)) - dbgs() << "Used Reg: " << reg << "\n";); - - // FIXME: This is a slow linear scanning - for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg) && - !MFI->isRetReg(reg) && - (MFI->isKernel() || !MFI->isArgReg(reg))) - MFI->addLocalVarReg(reg); - - // Notify MachineFunctionInfo that I've done adding local var reg - MFI->doneAddLocalVar(); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) - dbgs() << "Arg Reg: " << *i << "\n";); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) - dbgs() << "Local Var Reg: " << *i << "\n";); + // Generate list of all virtual registers used in this function + for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + MFI->addVirtualRegister(TRC, Reg); + } return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h index 9d65f5b..3b985f7 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,75 +15,148 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { + /// PTXMachineFunctionInfo - This class is derived from MachineFunction and /// contains private PTX target-specific information for each MachineFunction. /// class PTXMachineFunctionInfo : public MachineFunctionInfo { private: - bool is_kernel; - std::vector<unsigned> reg_arg, reg_local_var; - std::vector<unsigned> reg_ret; - bool _isDoneAddArg; + bool IsKernel; + DenseSet<unsigned> RegArgs; + DenseSet<unsigned> RegRets; + + typedef std::vector<unsigned> RegisterList; + typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap; + typedef DenseMap<unsigned, std::string> RegisterNameMap; + typedef DenseMap<int, std::string> FrameMap; + + RegisterMap UsedRegs; + RegisterNameMap RegNames; + FrameMap FrameSymbols; + + PTXParamManager ParamManager; public: + typedef DenseSet<unsigned>::const_iterator reg_iterator; + PTXMachineFunctionInfo(MachineFunction &MF) - : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) { - reg_arg.reserve(8); - reg_local_var.reserve(32); + : IsKernel(false) { + UsedRegs[PTX::RegPredRegisterClass] = RegisterList(); + UsedRegs[PTX::RegI16RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI64RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF64RegisterClass] = RegisterList(); } - void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; } - - void addArgReg(unsigned reg) { reg_arg.push_back(reg); } - void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void addRetReg(unsigned reg) { - if (!isRetReg(reg)) { - reg_ret.push_back(reg); + /// getParamManager - Returns the PTXParamManager instance for this function. + PTXParamManager& getParamManager() { return ParamManager; } + const PTXParamManager& getParamManager() const { return ParamManager; } + + /// setKernel/isKernel - Gets/sets a flag that indicates if this function is + /// a PTX kernel function. + void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } + bool isKernel() const { return IsKernel; } + + /// argreg_begin/argreg_end - Returns iterators to the set of registers + /// containing function arguments. + reg_iterator argreg_begin() const { return RegArgs.begin(); } + reg_iterator argreg_end() const { return RegArgs.end(); } + + /// retreg_begin/retreg_end - Returns iterators to the set of registers + /// containing the function return values. + reg_iterator retreg_begin() const { return RegRets.begin(); } + reg_iterator retreg_end() const { return RegRets.end(); } + + /// addRetReg - Adds a register to the set of return-value registers. + void addRetReg(unsigned Reg) { + if (!RegRets.count(Reg)) { + RegRets.insert(Reg); + std::string name; + name = "%ret"; + name += utostr(RegRets.size() - 1); + RegNames[Reg] = name; } } - void doneAddArg(void) { - _isDoneAddArg = true; + /// addArgReg - Adds a register to the set of function argument registers. + void addArgReg(unsigned Reg) { + RegArgs.insert(Reg); + std::string name; + name = "%param"; + name += utostr(RegArgs.size() - 1); + RegNames[Reg] = name; } - void doneAddLocalVar(void) {} - - bool isKernel() const { return is_kernel; } - typedef std::vector<unsigned>::const_iterator reg_iterator; - typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; - typedef std::vector<unsigned>::const_iterator ret_iterator; - - bool argRegEmpty() const { return reg_arg.empty(); } - int getNumArg() const { return reg_arg.size(); } - reg_iterator argRegBegin() const { return reg_arg.begin(); } - reg_iterator argRegEnd() const { return reg_arg.end(); } - reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } - reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } - - bool localVarRegEmpty() const { return reg_local_var.empty(); } - reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } - reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - - bool retRegEmpty() const { return reg_ret.empty(); } - int getNumRet() const { return reg_ret.size(); } - ret_iterator retRegBegin() const { return reg_ret.begin(); } - ret_iterator retRegEnd() const { return reg_ret.end(); } + /// addVirtualRegister - Adds a virtual register to the set of all used + /// registers in the function. + void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) { + std::string name; + + // Do not count registers that are argument/return registers. + if (!RegRets.count(Reg) && !RegArgs.count(Reg)) { + UsedRegs[TRC].push_back(Reg); + if (TRC == PTX::RegPredRegisterClass) + name = "%p"; + else if (TRC == PTX::RegI16RegisterClass) + name = "%rh"; + else if (TRC == PTX::RegI32RegisterClass) + name = "%r"; + else if (TRC == PTX::RegI64RegisterClass) + name = "%rd"; + else if (TRC == PTX::RegF32RegisterClass) + name = "%f"; + else if (TRC == PTX::RegF64RegisterClass) + name = "%fd"; + else + llvm_unreachable("Invalid register class"); + + name += utostr(UsedRegs[TRC].size() - 1); + RegNames[Reg] = name; + } + } - bool isArgReg(unsigned reg) const { - return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); + /// getRegisterName - Returns the name of the specified virtual register. This + /// name is used during PTX emission. + const char *getRegisterName(unsigned Reg) const { + if (RegNames.count(Reg)) + return RegNames.find(Reg)->second.c_str(); + else if (Reg == PTX::NoRegister) + return "%noreg"; + else + llvm_unreachable("Register not in register name map"); } - bool isRetReg(unsigned reg) const { - return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + /// getNumRegistersForClass - Returns the number of virtual registers that are + /// used for the specified register class. + unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const { + return UsedRegs.lookup(TRC).size(); } - bool isLocalVarReg(unsigned reg) const { - return std::find(reg_local_var.begin(), reg_local_var.end(), reg) - != reg_local_var.end(); + /// getFrameSymbol - Returns the symbol name for the given FrameIndex. + const char* getFrameSymbol(int FrameIndex) { + if (FrameSymbols.count(FrameIndex)) { + return FrameSymbols.lookup(FrameIndex).c_str(); + } else { + std::string Name = "__local"; + Name += utostr(FrameIndex); + // The whole point of caching this name is to ensure the pointer we pass + // to any getExternalSymbol() calls will remain valid for the lifetime of + // the back-end instance. This is to work around an issue in SelectionDAG + // where symbol names are expected to be life-long strings. + FrameSymbols[FrameIndex] = Name; + return FrameSymbols[FrameIndex].c_str(); + } } }; // class PTXMachineFunctionInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp new file mode 100644 index 0000000..7753787 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp @@ -0,0 +1,73 @@ +//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXParamManager.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +PTXParamManager::PTXParamManager() { +} + +unsigned PTXParamManager::addArgumentParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_ARGUMENT; + Param.Size = Size; + + std::string Name; + Name = "__param_"; + Name += utostr(ArgumentParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ArgumentParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addReturnParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_RETURN; + Param.Size = Size; + + std::string Name; + Name = "__ret_"; + Name += utostr(ReturnParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ReturnParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addLocalParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_LOCAL; + Param.Size = Size; + + std::string Name; + Name = "__localparam_"; + Name += utostr(LocalParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + LocalParams.push_back(Index); + + return Index; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h new file mode 100644 index 0000000..9fd2de5 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h @@ -0,0 +1,86 @@ +//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTXParamManager class, which manages all defined .param +// variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_PARAM_MANAGER_H +#define PTX_PARAM_MANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// PTXParamManager - This class manages all .param variables defined for a +/// particular function. +class PTXParamManager { +private: + + /// PTXParamType - Type of a .param variable + enum PTXParamType { + PTX_PARAM_TYPE_ARGUMENT, + PTX_PARAM_TYPE_RETURN, + PTX_PARAM_TYPE_LOCAL + }; + + /// PTXParam - Definition of a PTX .param variable + struct PTXParam { + PTXParamType Type; + unsigned Size; + std::string Name; + }; + + DenseMap<unsigned, PTXParam> AllParams; + SmallVector<unsigned, 4> ArgumentParams; + SmallVector<unsigned, 4> ReturnParams; + SmallVector<unsigned, 4> LocalParams; + +public: + + typedef SmallVector<unsigned, 4>::const_iterator param_iterator; + + PTXParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator local_begin() const { return LocalParams.begin(); } + param_iterator local_end() const { return LocalParams.end(); } + + /// addArgumentParam - Returns a new .param used as an argument. + unsigned addArgumentParam(unsigned Size); + + /// addReturnParam - Returns a new .param used as a return argument. + unsigned addReturnParam(unsigned Size); + + /// addLocalParam - Returns a new .param used as a local .param variable. + unsigned addLocalParam(unsigned Size); + + /// getParamName - Returns the name of the parameter as a string. + const std::string &getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Name; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Size; + } + +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp new file mode 100644 index 0000000..2d2d5c3 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp @@ -0,0 +1,58 @@ +//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a register allocator for PTX code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-reg-alloc" + +#include "PTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" + +using namespace llvm; + +namespace { + // Special register allocator for PTX. + class PTXRegAlloc : public MachineFunctionPass { + public: + static char ID; + PTXRegAlloc() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual const char* getPassName() const { + return "PTX Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // We do not actually do anything (at least not yet). + return false; + } + }; + + char PTXRegAlloc::ID = 0; + + static RegisterRegAlloc + ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); +} + +FunctionPass *llvm::createPTXRegisterAllocator() { + return new PTXRegAlloc(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp index cb56ea9..c806266 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp @@ -14,6 +14,9 @@ #include "PTX.h" #include "PTXRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,15 +26,23 @@ using namespace llvm; PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) - : PTXGenRegisterInfo() { + const TargetInstrInfo &tii) + // PTX does not have a return address register. + : PTXGenRegisterInfo(0), TII(tii) { } void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { unsigned Index; - MachineInstr& MI = *II; + MachineInstr &MI = *II; + //MachineBasicBlock &MBB = *MI.getParent(); + //DebugLoc dl = MI.getDebugLoc(); + //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + + //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass); + + llvm_unreachable("FrameIndex should have been previously eliminated!"); Index = 0; while (!MI.getOperand(Index).isFI()) { @@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32)) + //.addReg(Reg, RegState::Define).addImm(FrameIndex); + //if (MI2->findFirstPredOperandIdx() == -1) { + // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + //} + //MI2->dump(); + + //MachineOperand ESOp = MachineOperand::CreateES("__local__"); + // This frame index is post stack slot re-use assignments + //MI.getOperand(Index).ChangeToRegister(Reg, false); MI.getOperand(Index).ChangeToImmediate(FrameIndex); + //MI.getOperand(Index) = ESOp; } diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h index 0b63cb6..55fafe4 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h @@ -25,8 +25,12 @@ class PTXTargetMachine; class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + +public: PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII); + const TargetInstrInfo &tii); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { @@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { llvm_unreachable("PTX does not have a frame register"); return 0; } - - virtual unsigned getRARegister() const { - llvm_unreachable("PTX does not have a return address register"); - return 0; - } - - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); - } - virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); - } }; // struct PTXRegisterInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td index 1313d24..6ed6d3f 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td @@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> { // Registers //===----------------------------------------------------------------------===// -///===- Predicate Registers -----------------------------------------------===// - -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; -def P8 : PTXReg<"p8">; -def P9 : PTXReg<"p9">; -def P10 : PTXReg<"p10">; -def P11 : PTXReg<"p11">; -def P12 : PTXReg<"p12">; -def P13 : PTXReg<"p13">; -def P14 : PTXReg<"p14">; -def P15 : PTXReg<"p15">; -def P16 : PTXReg<"p16">; -def P17 : PTXReg<"p17">; -def P18 : PTXReg<"p18">; -def P19 : PTXReg<"p19">; -def P20 : PTXReg<"p20">; -def P21 : PTXReg<"p21">; -def P22 : PTXReg<"p22">; -def P23 : PTXReg<"p23">; -def P24 : PTXReg<"p24">; -def P25 : PTXReg<"p25">; -def P26 : PTXReg<"p26">; -def P27 : PTXReg<"p27">; -def P28 : PTXReg<"p28">; -def P29 : PTXReg<"p29">; -def P30 : PTXReg<"p30">; -def P31 : PTXReg<"p31">; -def P32 : PTXReg<"p32">; -def P33 : PTXReg<"p33">; -def P34 : PTXReg<"p34">; -def P35 : PTXReg<"p35">; -def P36 : PTXReg<"p36">; -def P37 : PTXReg<"p37">; -def P38 : PTXReg<"p38">; -def P39 : PTXReg<"p39">; -def P40 : PTXReg<"p40">; -def P41 : PTXReg<"p41">; -def P42 : PTXReg<"p42">; -def P43 : PTXReg<"p43">; -def P44 : PTXReg<"p44">; -def P45 : PTXReg<"p45">; -def P46 : PTXReg<"p46">; -def P47 : PTXReg<"p47">; -def P48 : PTXReg<"p48">; -def P49 : PTXReg<"p49">; -def P50 : PTXReg<"p50">; -def P51 : PTXReg<"p51">; -def P52 : PTXReg<"p52">; -def P53 : PTXReg<"p53">; -def P54 : PTXReg<"p54">; -def P55 : PTXReg<"p55">; -def P56 : PTXReg<"p56">; -def P57 : PTXReg<"p57">; -def P58 : PTXReg<"p58">; -def P59 : PTXReg<"p59">; -def P60 : PTXReg<"p60">; -def P61 : PTXReg<"p61">; -def P62 : PTXReg<"p62">; -def P63 : PTXReg<"p63">; -def P64 : PTXReg<"p64">; -def P65 : PTXReg<"p65">; -def P66 : PTXReg<"p66">; -def P67 : PTXReg<"p67">; -def P68 : PTXReg<"p68">; -def P69 : PTXReg<"p69">; -def P70 : PTXReg<"p70">; -def P71 : PTXReg<"p71">; -def P72 : PTXReg<"p72">; -def P73 : PTXReg<"p73">; -def P74 : PTXReg<"p74">; -def P75 : PTXReg<"p75">; -def P76 : PTXReg<"p76">; -def P77 : PTXReg<"p77">; -def P78 : PTXReg<"p78">; -def P79 : PTXReg<"p79">; -def P80 : PTXReg<"p80">; -def P81 : PTXReg<"p81">; -def P82 : PTXReg<"p82">; -def P83 : PTXReg<"p83">; -def P84 : PTXReg<"p84">; -def P85 : PTXReg<"p85">; -def P86 : PTXReg<"p86">; -def P87 : PTXReg<"p87">; -def P88 : PTXReg<"p88">; -def P89 : PTXReg<"p89">; -def P90 : PTXReg<"p90">; -def P91 : PTXReg<"p91">; -def P92 : PTXReg<"p92">; -def P93 : PTXReg<"p93">; -def P94 : PTXReg<"p94">; -def P95 : PTXReg<"p95">; -def P96 : PTXReg<"p96">; -def P97 : PTXReg<"p97">; -def P98 : PTXReg<"p98">; -def P99 : PTXReg<"p99">; -def P100 : PTXReg<"p100">; -def P101 : PTXReg<"p101">; -def P102 : PTXReg<"p102">; -def P103 : PTXReg<"p103">; -def P104 : PTXReg<"p104">; -def P105 : PTXReg<"p105">; -def P106 : PTXReg<"p106">; -def P107 : PTXReg<"p107">; -def P108 : PTXReg<"p108">; -def P109 : PTXReg<"p109">; -def P110 : PTXReg<"p110">; -def P111 : PTXReg<"p111">; -def P112 : PTXReg<"p112">; -def P113 : PTXReg<"p113">; -def P114 : PTXReg<"p114">; -def P115 : PTXReg<"p115">; -def P116 : PTXReg<"p116">; -def P117 : PTXReg<"p117">; -def P118 : PTXReg<"p118">; -def P119 : PTXReg<"p119">; -def P120 : PTXReg<"p120">; -def P121 : PTXReg<"p121">; -def P122 : PTXReg<"p122">; -def P123 : PTXReg<"p123">; -def P124 : PTXReg<"p124">; -def P125 : PTXReg<"p125">; -def P126 : PTXReg<"p126">; -def P127 : PTXReg<"p127">; - -///===- 16-Bit Registers --------------------------------------------------===// - -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; -def RH8 : PTXReg<"rh8">; -def RH9 : PTXReg<"rh9">; -def RH10 : PTXReg<"rh10">; -def RH11 : PTXReg<"rh11">; -def RH12 : PTXReg<"rh12">; -def RH13 : PTXReg<"rh13">; -def RH14 : PTXReg<"rh14">; -def RH15 : PTXReg<"rh15">; -def RH16 : PTXReg<"rh16">; -def RH17 : PTXReg<"rh17">; -def RH18 : PTXReg<"rh18">; -def RH19 : PTXReg<"rh19">; -def RH20 : PTXReg<"rh20">; -def RH21 : PTXReg<"rh21">; -def RH22 : PTXReg<"rh22">; -def RH23 : PTXReg<"rh23">; -def RH24 : PTXReg<"rh24">; -def RH25 : PTXReg<"rh25">; -def RH26 : PTXReg<"rh26">; -def RH27 : PTXReg<"rh27">; -def RH28 : PTXReg<"rh28">; -def RH29 : PTXReg<"rh29">; -def RH30 : PTXReg<"rh30">; -def RH31 : PTXReg<"rh31">; -def RH32 : PTXReg<"rh32">; -def RH33 : PTXReg<"rh33">; -def RH34 : PTXReg<"rh34">; -def RH35 : PTXReg<"rh35">; -def RH36 : PTXReg<"rh36">; -def RH37 : PTXReg<"rh37">; -def RH38 : PTXReg<"rh38">; -def RH39 : PTXReg<"rh39">; -def RH40 : PTXReg<"rh40">; -def RH41 : PTXReg<"rh41">; -def RH42 : PTXReg<"rh42">; -def RH43 : PTXReg<"rh43">; -def RH44 : PTXReg<"rh44">; -def RH45 : PTXReg<"rh45">; -def RH46 : PTXReg<"rh46">; -def RH47 : PTXReg<"rh47">; -def RH48 : PTXReg<"rh48">; -def RH49 : PTXReg<"rh49">; -def RH50 : PTXReg<"rh50">; -def RH51 : PTXReg<"rh51">; -def RH52 : PTXReg<"rh52">; -def RH53 : PTXReg<"rh53">; -def RH54 : PTXReg<"rh54">; -def RH55 : PTXReg<"rh55">; -def RH56 : PTXReg<"rh56">; -def RH57 : PTXReg<"rh57">; -def RH58 : PTXReg<"rh58">; -def RH59 : PTXReg<"rh59">; -def RH60 : PTXReg<"rh60">; -def RH61 : PTXReg<"rh61">; -def RH62 : PTXReg<"rh62">; -def RH63 : PTXReg<"rh63">; -def RH64 : PTXReg<"rh64">; -def RH65 : PTXReg<"rh65">; -def RH66 : PTXReg<"rh66">; -def RH67 : PTXReg<"rh67">; -def RH68 : PTXReg<"rh68">; -def RH69 : PTXReg<"rh69">; -def RH70 : PTXReg<"rh70">; -def RH71 : PTXReg<"rh71">; -def RH72 : PTXReg<"rh72">; -def RH73 : PTXReg<"rh73">; -def RH74 : PTXReg<"rh74">; -def RH75 : PTXReg<"rh75">; -def RH76 : PTXReg<"rh76">; -def RH77 : PTXReg<"rh77">; -def RH78 : PTXReg<"rh78">; -def RH79 : PTXReg<"rh79">; -def RH80 : PTXReg<"rh80">; -def RH81 : PTXReg<"rh81">; -def RH82 : PTXReg<"rh82">; -def RH83 : PTXReg<"rh83">; -def RH84 : PTXReg<"rh84">; -def RH85 : PTXReg<"rh85">; -def RH86 : PTXReg<"rh86">; -def RH87 : PTXReg<"rh87">; -def RH88 : PTXReg<"rh88">; -def RH89 : PTXReg<"rh89">; -def RH90 : PTXReg<"rh90">; -def RH91 : PTXReg<"rh91">; -def RH92 : PTXReg<"rh92">; -def RH93 : PTXReg<"rh93">; -def RH94 : PTXReg<"rh94">; -def RH95 : PTXReg<"rh95">; -def RH96 : PTXReg<"rh96">; -def RH97 : PTXReg<"rh97">; -def RH98 : PTXReg<"rh98">; -def RH99 : PTXReg<"rh99">; -def RH100 : PTXReg<"rh100">; -def RH101 : PTXReg<"rh101">; -def RH102 : PTXReg<"rh102">; -def RH103 : PTXReg<"rh103">; -def RH104 : PTXReg<"rh104">; -def RH105 : PTXReg<"rh105">; -def RH106 : PTXReg<"rh106">; -def RH107 : PTXReg<"rh107">; -def RH108 : PTXReg<"rh108">; -def RH109 : PTXReg<"rh109">; -def RH110 : PTXReg<"rh110">; -def RH111 : PTXReg<"rh111">; -def RH112 : PTXReg<"rh112">; -def RH113 : PTXReg<"rh113">; -def RH114 : PTXReg<"rh114">; -def RH115 : PTXReg<"rh115">; -def RH116 : PTXReg<"rh116">; -def RH117 : PTXReg<"rh117">; -def RH118 : PTXReg<"rh118">; -def RH119 : PTXReg<"rh119">; -def RH120 : PTXReg<"rh120">; -def RH121 : PTXReg<"rh121">; -def RH122 : PTXReg<"rh122">; -def RH123 : PTXReg<"rh123">; -def RH124 : PTXReg<"rh124">; -def RH125 : PTXReg<"rh125">; -def RH126 : PTXReg<"rh126">; -def RH127 : PTXReg<"rh127">; - -///===- 32-Bit Registers --------------------------------------------------===// - -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; -def R8 : PTXReg<"r8">; -def R9 : PTXReg<"r9">; -def R10 : PTXReg<"r10">; -def R11 : PTXReg<"r11">; -def R12 : PTXReg<"r12">; -def R13 : PTXReg<"r13">; -def R14 : PTXReg<"r14">; -def R15 : PTXReg<"r15">; -def R16 : PTXReg<"r16">; -def R17 : PTXReg<"r17">; -def R18 : PTXReg<"r18">; -def R19 : PTXReg<"r19">; -def R20 : PTXReg<"r20">; -def R21 : PTXReg<"r21">; -def R22 : PTXReg<"r22">; -def R23 : PTXReg<"r23">; -def R24 : PTXReg<"r24">; -def R25 : PTXReg<"r25">; -def R26 : PTXReg<"r26">; -def R27 : PTXReg<"r27">; -def R28 : PTXReg<"r28">; -def R29 : PTXReg<"r29">; -def R30 : PTXReg<"r30">; -def R31 : PTXReg<"r31">; -def R32 : PTXReg<"r32">; -def R33 : PTXReg<"r33">; -def R34 : PTXReg<"r34">; -def R35 : PTXReg<"r35">; -def R36 : PTXReg<"r36">; -def R37 : PTXReg<"r37">; -def R38 : PTXReg<"r38">; -def R39 : PTXReg<"r39">; -def R40 : PTXReg<"r40">; -def R41 : PTXReg<"r41">; -def R42 : PTXReg<"r42">; -def R43 : PTXReg<"r43">; -def R44 : PTXReg<"r44">; -def R45 : PTXReg<"r45">; -def R46 : PTXReg<"r46">; -def R47 : PTXReg<"r47">; -def R48 : PTXReg<"r48">; -def R49 : PTXReg<"r49">; -def R50 : PTXReg<"r50">; -def R51 : PTXReg<"r51">; -def R52 : PTXReg<"r52">; -def R53 : PTXReg<"r53">; -def R54 : PTXReg<"r54">; -def R55 : PTXReg<"r55">; -def R56 : PTXReg<"r56">; -def R57 : PTXReg<"r57">; -def R58 : PTXReg<"r58">; -def R59 : PTXReg<"r59">; -def R60 : PTXReg<"r60">; -def R61 : PTXReg<"r61">; -def R62 : PTXReg<"r62">; -def R63 : PTXReg<"r63">; -def R64 : PTXReg<"r64">; -def R65 : PTXReg<"r65">; -def R66 : PTXReg<"r66">; -def R67 : PTXReg<"r67">; -def R68 : PTXReg<"r68">; -def R69 : PTXReg<"r69">; -def R70 : PTXReg<"r70">; -def R71 : PTXReg<"r71">; -def R72 : PTXReg<"r72">; -def R73 : PTXReg<"r73">; -def R74 : PTXReg<"r74">; -def R75 : PTXReg<"r75">; -def R76 : PTXReg<"r76">; -def R77 : PTXReg<"r77">; -def R78 : PTXReg<"r78">; -def R79 : PTXReg<"r79">; -def R80 : PTXReg<"r80">; -def R81 : PTXReg<"r81">; -def R82 : PTXReg<"r82">; -def R83 : PTXReg<"r83">; -def R84 : PTXReg<"r84">; -def R85 : PTXReg<"r85">; -def R86 : PTXReg<"r86">; -def R87 : PTXReg<"r87">; -def R88 : PTXReg<"r88">; -def R89 : PTXReg<"r89">; -def R90 : PTXReg<"r90">; -def R91 : PTXReg<"r91">; -def R92 : PTXReg<"r92">; -def R93 : PTXReg<"r93">; -def R94 : PTXReg<"r94">; -def R95 : PTXReg<"r95">; -def R96 : PTXReg<"r96">; -def R97 : PTXReg<"r97">; -def R98 : PTXReg<"r98">; -def R99 : PTXReg<"r99">; -def R100 : PTXReg<"r100">; -def R101 : PTXReg<"r101">; -def R102 : PTXReg<"r102">; -def R103 : PTXReg<"r103">; -def R104 : PTXReg<"r104">; -def R105 : PTXReg<"r105">; -def R106 : PTXReg<"r106">; -def R107 : PTXReg<"r107">; -def R108 : PTXReg<"r108">; -def R109 : PTXReg<"r109">; -def R110 : PTXReg<"r110">; -def R111 : PTXReg<"r111">; -def R112 : PTXReg<"r112">; -def R113 : PTXReg<"r113">; -def R114 : PTXReg<"r114">; -def R115 : PTXReg<"r115">; -def R116 : PTXReg<"r116">; -def R117 : PTXReg<"r117">; -def R118 : PTXReg<"r118">; -def R119 : PTXReg<"r119">; -def R120 : PTXReg<"r120">; -def R121 : PTXReg<"r121">; -def R122 : PTXReg<"r122">; -def R123 : PTXReg<"r123">; -def R124 : PTXReg<"r124">; -def R125 : PTXReg<"r125">; -def R126 : PTXReg<"r126">; -def R127 : PTXReg<"r127">; - -///===- 64-Bit Registers --------------------------------------------------===// - -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; -def RD8 : PTXReg<"rd8">; -def RD9 : PTXReg<"rd9">; -def RD10 : PTXReg<"rd10">; -def RD11 : PTXReg<"rd11">; -def RD12 : PTXReg<"rd12">; -def RD13 : PTXReg<"rd13">; -def RD14 : PTXReg<"rd14">; -def RD15 : PTXReg<"rd15">; -def RD16 : PTXReg<"rd16">; -def RD17 : PTXReg<"rd17">; -def RD18 : PTXReg<"rd18">; -def RD19 : PTXReg<"rd19">; -def RD20 : PTXReg<"rd20">; -def RD21 : PTXReg<"rd21">; -def RD22 : PTXReg<"rd22">; -def RD23 : PTXReg<"rd23">; -def RD24 : PTXReg<"rd24">; -def RD25 : PTXReg<"rd25">; -def RD26 : PTXReg<"rd26">; -def RD27 : PTXReg<"rd27">; -def RD28 : PTXReg<"rd28">; -def RD29 : PTXReg<"rd29">; -def RD30 : PTXReg<"rd30">; -def RD31 : PTXReg<"rd31">; -def RD32 : PTXReg<"rd32">; -def RD33 : PTXReg<"rd33">; -def RD34 : PTXReg<"rd34">; -def RD35 : PTXReg<"rd35">; -def RD36 : PTXReg<"rd36">; -def RD37 : PTXReg<"rd37">; -def RD38 : PTXReg<"rd38">; -def RD39 : PTXReg<"rd39">; -def RD40 : PTXReg<"rd40">; -def RD41 : PTXReg<"rd41">; -def RD42 : PTXReg<"rd42">; -def RD43 : PTXReg<"rd43">; -def RD44 : PTXReg<"rd44">; -def RD45 : PTXReg<"rd45">; -def RD46 : PTXReg<"rd46">; -def RD47 : PTXReg<"rd47">; -def RD48 : PTXReg<"rd48">; -def RD49 : PTXReg<"rd49">; -def RD50 : PTXReg<"rd50">; -def RD51 : PTXReg<"rd51">; -def RD52 : PTXReg<"rd52">; -def RD53 : PTXReg<"rd53">; -def RD54 : PTXReg<"rd54">; -def RD55 : PTXReg<"rd55">; -def RD56 : PTXReg<"rd56">; -def RD57 : PTXReg<"rd57">; -def RD58 : PTXReg<"rd58">; -def RD59 : PTXReg<"rd59">; -def RD60 : PTXReg<"rd60">; -def RD61 : PTXReg<"rd61">; -def RD62 : PTXReg<"rd62">; -def RD63 : PTXReg<"rd63">; -def RD64 : PTXReg<"rd64">; -def RD65 : PTXReg<"rd65">; -def RD66 : PTXReg<"rd66">; -def RD67 : PTXReg<"rd67">; -def RD68 : PTXReg<"rd68">; -def RD69 : PTXReg<"rd69">; -def RD70 : PTXReg<"rd70">; -def RD71 : PTXReg<"rd71">; -def RD72 : PTXReg<"rd72">; -def RD73 : PTXReg<"rd73">; -def RD74 : PTXReg<"rd74">; -def RD75 : PTXReg<"rd75">; -def RD76 : PTXReg<"rd76">; -def RD77 : PTXReg<"rd77">; -def RD78 : PTXReg<"rd78">; -def RD79 : PTXReg<"rd79">; -def RD80 : PTXReg<"rd80">; -def RD81 : PTXReg<"rd81">; -def RD82 : PTXReg<"rd82">; -def RD83 : PTXReg<"rd83">; -def RD84 : PTXReg<"rd84">; -def RD85 : PTXReg<"rd85">; -def RD86 : PTXReg<"rd86">; -def RD87 : PTXReg<"rd87">; -def RD88 : PTXReg<"rd88">; -def RD89 : PTXReg<"rd89">; -def RD90 : PTXReg<"rd90">; -def RD91 : PTXReg<"rd91">; -def RD92 : PTXReg<"rd92">; -def RD93 : PTXReg<"rd93">; -def RD94 : PTXReg<"rd94">; -def RD95 : PTXReg<"rd95">; -def RD96 : PTXReg<"rd96">; -def RD97 : PTXReg<"rd97">; -def RD98 : PTXReg<"rd98">; -def RD99 : PTXReg<"rd99">; -def RD100 : PTXReg<"rd100">; -def RD101 : PTXReg<"rd101">; -def RD102 : PTXReg<"rd102">; -def RD103 : PTXReg<"rd103">; -def RD104 : PTXReg<"rd104">; -def RD105 : PTXReg<"rd105">; -def RD106 : PTXReg<"rd106">; -def RD107 : PTXReg<"rd107">; -def RD108 : PTXReg<"rd108">; -def RD109 : PTXReg<"rd109">; -def RD110 : PTXReg<"rd110">; -def RD111 : PTXReg<"rd111">; -def RD112 : PTXReg<"rd112">; -def RD113 : PTXReg<"rd113">; -def RD114 : PTXReg<"rd114">; -def RD115 : PTXReg<"rd115">; -def RD116 : PTXReg<"rd116">; -def RD117 : PTXReg<"rd117">; -def RD118 : PTXReg<"rd118">; -def RD119 : PTXReg<"rd119">; -def RD120 : PTXReg<"rd120">; -def RD121 : PTXReg<"rd121">; -def RD122 : PTXReg<"rd122">; -def RD123 : PTXReg<"rd123">; -def RD124 : PTXReg<"rd124">; -def RD125 : PTXReg<"rd125">; -def RD126 : PTXReg<"rd126">; -def RD127 : PTXReg<"rd127">; +// The generated register info code throws warnings for empty register classes +// (e.g. zero-length arrays), so we use a dummy register here just to prevent +// these warnings. +def DUMMY_REG : PTXReg<"R0">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; +def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 0000000..50ef14a --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,149 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + // Always inline memcpys. In PTX, we do not have a C library that provides + // a memcpy function. + //if (!AlwaysInline) + // return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + llvm_unreachable("memset lowering not implemented for PTX yet"); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h new file mode 100644 index 0000000..e0c7167 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h @@ -0,0 +1,53 @@ +//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTX subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXSELECTIONDAGINFO_H +#define PTXSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. +/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. +class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can + /// make the right decision when generating code for different targets. + const PTXSubtarget *Subtarget; + +public: + explicit PTXSelectionDAGInfo(const TargetMachine &TM); + ~PTXSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp index 8ec646e..1eb57d2 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp @@ -14,7 +14,7 @@ #include "PTXSubtarget.h" #include "PTX.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h index 0921f1f..b946d7c 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h @@ -114,7 +114,16 @@ class StringRef; (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); } - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool callsAreHandled() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool emitPtrAttribute() const { + return PTXVersion >= PTX_VERSION_2_2; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; // class PTXSubtarget } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp index ab926e0..449a3d9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -14,8 +14,32 @@ #include "PTX.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; @@ -25,7 +49,7 @@ namespace llvm { bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, - TargetAsmBackend *TAB, + MCAsmBackend *MAB, bool ShowInst); } @@ -43,34 +67,47 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + + // Copied from LLVMTargetMachine.cpp + void printNoVerify(PassManagerBase &PM, const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + } + + void printAndVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + //if (VerifyMachineCode) + // PM.add(createMachineVerifierPass(Banner)); + } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS, + StringRef TT, StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), + TSInfo(*this), TLInfo(*this) { } -PTX32TargetMachine::PTX32TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, false) { +PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { } -PTX64TargetMachine::PTX64TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, true) { +PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, @@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // PTXMFInfoExtract must after register allocation! + //PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // This is mostly based on LLVMTargetMachine::addPassesToEmitFile + + // Add common CodeGen passes. + MCContext *Context = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + return true; + assert(Context != 0 && "Failed to get MCContext"); + + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + OwningPtr<MCStreamer> AsmStreamer; + + switch (FileType) { + default: return true; + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + MCAsmBackend *MAB = 0; + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + true, /* verbose asm */ + hasMCUseLoc(), + hasMCUseCFI(), + InstPrinter, + MCE, MAB, + false /* show MC encoding */); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + llvm_unreachable("Object file emission is not supported with PTX"); + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + // MC Logging + //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + PM.add(createGCInfoDeleter()); + return false; +} + +bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DisableVerify, + MCContext *&OutContext) { + // Add standard LLVM codegen passes. + // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some + // modifications for the PTX target. + + // Standard LLVM-Level Passes. + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + PM.add(createLowerInvokePass(getTargetLowering())); + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(PM, OptLevel); + + //PM.add(createPrintFunctionPass("\n\n" + // "*** Final LLVM Code input to ISel ***\n", + // &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Standard Lower-Level Passes. + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getRegisterInfo(), + &getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, "After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + PM.add(createExpandISelPseudosPass()); + + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + + if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass"); + + PM.add(createMachineLICMPass()); + PM.add(createMachineCSEPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PreRegAlloc passes"); + + // Perform register allocation. + PM.add(createPTXRegisterAllocator()); + printAndVerify(PM, "After Register Allocation"); + + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); + + // FIXME: Post-RA LICM has asserts that fire on virtual registers. + // Run post-ra machine LICM to hoist reloads / remats. + //if (!DisablePostRAMachineLICM) + // PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); + } + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PostRegAlloc passes"); + + PM.add(createExpandPostRAPseudosPass()); + printAndVerify(PM, "After ExpandPostRAPseudos"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM, "After PrologEpilogCodeInserter"); + + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None) { + PM.add(createPostRAScheduler(OptLevel)); + printAndVerify(PM, "After PostRAScheduler"); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printNoVerify(PM, "After BranchFolding"); + } + + // Tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(false)); + printNoVerify(PM, "After TailDuplicate"); + } + + PM.add(createGCMachineCodeAnalysisPass()); + + //if (PrintGCInfo) + // PM.add(createGCInfoPrinter(dbgs())); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } + + if (addPreEmitPass(PM, OptLevel)) + printNoVerify(PM, "After PreEmit passes"); + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h index ae42153..5b7c82b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -17,6 +17,7 @@ #include "PTXISelLowering.h" #include "PTXInstrInfo.h" #include "PTXFrameLowering.h" +#include "PTXSelectionDAGInfo.h" #include "PTXSubtarget.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -25,15 +26,17 @@ namespace llvm { class PTXTargetMachine : public LLVMTargetMachine { private: - const TargetData DataLayout; - PTXSubtarget Subtarget; // has to be initialized before FrameLowering - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; - PTXTargetLowering TLInfo; + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXSelectionDAGInfo TSInfo; + PTXTargetLowering TLInfo; public: - PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + PTXTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + + // We override this method to supply our own set of codegen passes. + virtual bool addPassesToEmitFile(PassManagerBase &, + formatted_raw_ostream &, + CodeGenFileType, + CodeGenOpt::Level, + bool = true); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + private: + + bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine class PTX32TargetMachine : public PTXTargetMachine { public: - PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: - PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt deleted file mode 100644 index 4b09cf5..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXInfo - PTXTargetInfo.cpp - ) - -add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile deleted file mode 100644 index 8619785..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp index 9df6c75..09a2735 100644 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -9,7 +9,7 @@ #include "PTX.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py deleted file mode 100755 index 1528690..0000000 --- a/contrib/llvm/lib/Target/PTX/generate-register-td.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python -##===- generate-register-td.py --------------------------------*-python-*--===## -## -## The LLVM Compiler Infrastructure -## -## This file is distributed under the University of Illinois Open Source -## License. See LICENSE.TXT for details. -## -##===----------------------------------------------------------------------===## -## -## This file describes the PTX register file generator. -## -##===----------------------------------------------------------------------===## - -from sys import argv, exit, stdout - - -if len(argv) != 5: - print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') - exit(1) - -try: - num_pred = int(argv[1]) - num_16bit = int(argv[2]) - num_32bit = int(argv[3]) - num_64bit = int(argv[4]) -except: - print('ERROR: Invalid integer parameter') - exit(1) - -## Print the register definition file -td_file = open('PTXRegisterInfo.td', 'w') - -td_file.write(''' -//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class PTXReg<string n> : Register<n> { - let Namespace = "PTX"; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -''') - - -# Print predicate registers -td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') -for r in range(0, num_pred): - td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) - -# Print 16-bit registers -td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_16bit): - td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) - -# Print 32-bit registers -td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_32bit): - td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) - -# Print 64-bit registers -td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_64bit): - td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) - - -td_file.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - - -# Print register classes - -td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) -td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) -td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) -td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) - - -td_file.close() - -## Now write the PTXCallingConv.td file -td_file = open('PTXCallingConv.td', 'w') - -# Reserve 10% of the available registers for return values, and the other 90% -# for parameters -num_ret_pred = int(0.1 * num_pred) -num_ret_16bit = int(0.1 * num_16bit) -num_ret_32bit = int(0.1 * num_32bit) -num_ret_64bit = int(0.1 * num_64bit) -num_param_pred = num_pred - num_ret_pred -num_param_16bit = num_16bit - num_ret_16bit -num_param_32bit = num_32bit - num_ret_32bit -num_param_64bit = num_64bit - num_ret_64bit - -param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] -ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] -param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] -ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] -param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] -ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] -param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] -ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] - -param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) -ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) -param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) -ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) -param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) -ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) -param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) -ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) - -td_file.write(''' -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; -''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, - ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) - - -td_file.close() diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt deleted file mode 100644 index 389ea77..0000000 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPowerPCAsmPrinter - PPCInstPrinter.cpp - ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile deleted file mode 100644 index f097e84..0000000 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCAsmPrinter - -# Hack: we need to include 'main' powerpc target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 1a9bd76..b6a0835 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -13,7 +13,8 @@ #define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" -#include "PPCPredicates.h" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" @@ -30,7 +31,8 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); @@ -49,6 +51,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; + + printAnnotation(O, Annot); return; } } @@ -59,6 +63,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printOperand(MI, 0, O); O << ", "; printOperand(MI, 1, O); + printAnnotation(O, Annot); return; } @@ -72,11 +77,13 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; + printAnnotation(O, Annot); return; } } printInstruction(MI, O); + printAnnotation(O, Annot); } diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index d022a44..4ed4b76 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -32,7 +32,7 @@ public: } virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; static const char *getInstructionName(unsigned Opcode); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index a1b8166..0000000 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMPowerPCDesc - PPCMCTargetDesc.cpp - PPCMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile deleted file mode 100644 index 9db6662..0000000 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4b8cbb7..9f2fd6d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,17 +7,43 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "PPC.h" -#include "PPCFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + return Value; + case PPC::fixup_ppc_brcond14: + return Value & 0x3ffc; + case PPC::fixup_ppc_br24: + return Value & 0x3fffffc; +#if 0 + case PPC::fixup_ppc_hi16: + return (Value >> 16) & 0xffff; +#endif + case PPC::fixup_ppc_ha16: + return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; + case PPC::fixup_ppc_lo16: + return Value & 0xffff; + } +} + namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: @@ -31,10 +57,17 @@ public: MCValue Target, uint64_t &FixedValue) {} }; -class PPCAsmBackend : public TargetAsmBackend { +class PPCELFObjectWriter : public MCELFObjectTargetWriter { +public: + PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend, bool isLittleEndian) + : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {} +}; + +class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; public: - PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {} + PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } @@ -49,7 +82,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -109,15 +142,50 @@ namespace { return false; } }; + + class ELFPPCAsmBackend : public PPCAsmBackend { + Triple::OSType OSType; + public: + ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) : + PPCAsmBackend(T), OSType(OSType) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != 4; ++i) + Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + bool is64 = getPointerSize() == 8; + return createELFObjectWriter(new PPCELFObjectWriter( + /*Is64Bit=*/is64, + OSType, + is64 ? ELF::EM_PPC64 : ELF::EM_PPC, + /*addend*/ true, /*isLittleEndian*/ false), + OS, /*IsLittleEndian=*/false); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + }; + } // end anonymous namespace -TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - return 0; + return new ELFPPCAsmBackend(T, Triple(TT).getOS()); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h new file mode 100644 index 0000000..369bbdc --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h @@ -0,0 +1,70 @@ +//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PPC target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCBASEINFO_H +#define PPCBASEINFO_H + +#include "PPCMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// getPPCRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { + using namespace PPC; + switch (RegEnum) { + case 0: return 0; + case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; + case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; + case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; + case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; + case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; + case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; + case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; + case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; + case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; + case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; + case R10: case X10: case F10: case V10: case CR2EQ: return 10; + case R11: case X11: case F11: case V11: case CR2UN: return 11; + case R12: case X12: case F12: case V12: case CR3LT: return 12; + case R13: case X13: case F13: case V13: case CR3GT: return 13; + case R14: case X14: case F14: case V14: case CR3EQ: return 14; + case R15: case X15: case F15: case V15: case CR3UN: return 15; + case R16: case X16: case F16: case V16: case CR4LT: return 16; + case R17: case X17: case F17: case V17: case CR4GT: return 17; + case R18: case X18: case F18: case V18: case CR4EQ: return 18; + case R19: case X19: case F19: case V19: case CR4UN: return 19; + case R20: case X20: case F20: case V20: case CR5LT: return 20; + case R21: case X21: case F21: case V21: case CR5GT: return 21; + case R22: case X22: case F22: case V22: case CR5EQ: return 22; + case R23: case X23: case F23: case V23: case CR5UN: return 23; + case R24: case X24: case F24: case V24: case CR6LT: return 24; + case R25: case X25: case F25: case V25: case CR6GT: return 25; + case R26: case X26: case F26: case V26: case CR6EQ: return 26; + case R27: case X27: case F27: case V27: case CR6UN: return 27; + case R28: case X28: case F28: case V28: case CR7LT: return 28; + case R29: case X29: case F29: case V29: case CR7GT: return 29; + case R30: case X30: case F30: case V30: case CR7EQ: return 30; + case R31: case X31: case F31: case V31: case CR7UN: return 31; + default: + llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); + } +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index b3c889e..b3c889e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b6dca83..e9424d8 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -31,6 +31,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { + if (is64Bit) + PointerSize = 8; + IsLittleEndian = false; + // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; @@ -56,7 +60,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - HasLCOMMDirective = true; + LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index cf73d86..262f97c3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -12,9 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "PPC.h" -#include "PPCRegisterInfo.h" -#include "PPCFixupKinds.h" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCInst.h" #include "llvm/ADT/Statistic.h" @@ -170,7 +169,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } @@ -182,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return getPPCRegisterNumbering(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 02b887f..d5c8a9e 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "PPCMCTargetDesc.h" #include "PPCMCAsmInfo.h" +#include "InstPrinter/PPCInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "PPCGenInstrInfo.inc" @@ -35,11 +39,16 @@ static MCInstrInfo *createPPCMCInstrInfo() { return X; } -extern "C" void LLVMInitializePowerPCMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); -} +static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = (TheTriple.getArch() == Triple::ppc64); + unsigned Flavour = isPPC64 ? 0 : 1; + unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; + MCRegisterInfo *X = new MCRegisterInfo(); + InitPPCMCRegisterInfo(X, RA, Flavour, Flavour); + return X; +} static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { @@ -48,23 +57,95 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializePowerPCMCSubtargetInfo() { +static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = TheTriple.getArch() == Triple::ppc64; + + MCAsmInfo *MAI; + if (TheTriple.isOSDarwin()) + MAI = new PPCMCAsmInfoDarwin(isPPC64); + else + MAI = new PPCLinuxMCAsmInfo(isPPC64); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(PPC::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + if (RM == Reloc::Default) { + Triple T(TT); + if (T.isOSDarwin()) + RM = Reloc::DynamicNoPIC; + else + RM = Reloc::Static; + } + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + if (Triple(TT).isOSDarwin()) + return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + return new PPCInstPrinter(MAI, SyntaxVariant); +} + +extern "C" void LLVMInitializePowerPCTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo); + RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, createPPCMCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, createPPCMCSubtargetInfo); -} -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - bool isPPC64 = TheTriple.getArch() == Triple::ppc64; - if (TheTriple.isOSDarwin()) - return new PPCMCAsmInfoDarwin(isPPC64); - return new PPCLinuxMCAsmInfo(isPPC64); + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); -} + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend); + TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); -extern "C" void LLVMInitializePowerPCMCAsmInfo() { - RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo); - RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index cee2350..e5bf2a9 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -15,6 +15,10 @@ #define PPCMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; class MCSubtargetInfo; class Target; class StringRef; @@ -22,6 +26,12 @@ class StringRef; extern Target ThePPC32Target; extern Target ThePPC64Target; +MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 12bb0a1..12bb0a1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index b2c8315..f872e86 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,8 +14,6 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H -#include "PPC.h" - namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index 7191dd1..5dc1863 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H +#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> @@ -30,22 +31,12 @@ namespace llvm { class MachineInstr; class AsmPrinter; class MCInst; - class MCCodeEmitter; - class MCContext; - class MCInstrInfo; - class MCSubtargetInfo; class TargetMachine; - class TargetAsmBackend; FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); - MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &); - void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index aabf494..2d5d302 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -43,9 +43,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true", "Enable GPUL instructions">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", - "Enable the fsqrt instruction">; + "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", - "Enable the stfiwx instruction">; + "Enable the stfiwx instruction">; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9de2200..9528459 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -18,9 +18,9 @@ #define DEBUG_TYPE "asmprinter" #include "PPC.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -43,11 +43,11 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" @@ -679,18 +679,8 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, return new PPCLinuxAsmPrinter(tm, Streamer); } -static MCInstPrinter *createPPCMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - return new PPCInstPrinter(MAI, SyntaxVariant); -} - - // Force static initialization. extern "C" void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); - - TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index e161d23..475edf3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -19,7 +19,7 @@ #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" -#include "PPCPredicates.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp index 42232a0..4a1f182 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -140,7 +140,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, @@ -250,7 +250,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return getPPCRegisterNumbering(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 375e000..7dead10 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -109,14 +109,14 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first); + unsigned RegNo = getPPCRegisterNumbering(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I); + unsigned RegNo = getPPCRegisterNumbering(*I); if (VRRegNo[RegNo] == *I) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } @@ -712,13 +712,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(PPC::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - static bool spillsCR(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); return FuncInfo->isCRSpilled(); @@ -885,7 +878,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8; + LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it @@ -919,8 +912,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinReg = - std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR), - PPCRegisterInfo::getRegisterNumbering(MinG8R)); + std::min<unsigned>(getPPCRegisterNumbering(MinGPR), + getPPCRegisterNumbering(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 0c18de1..20faa71 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -40,7 +40,6 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 2176c02..6f204cc 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "ppc-codegen" #include "PPC.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9741a39..d6b8a9e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14,8 +14,8 @@ #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -365,7 +366,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { setStackPointerRegisterToSaveRestore(PPC::X1); @@ -401,12 +406,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. -unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) @@ -463,7 +470,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -1368,8 +1375,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1378,7 +1390,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - const Type *IntPtrTy = + Type *IntPtrTy = DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( *DAG.getContext()); @@ -1398,16 +1410,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()), + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); - SDValue Ops[] = - { CallResult.first, CallResult.second }; - - return DAG.getMergeValues(Ops, 2, dl); + return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, @@ -2550,7 +2559,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { unsigned OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (!PPCSubTarget.getTargetTriple().isMacOSX() || + (PPCSubTarget.getTargetTriple().isMacOSX() && PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || G->getGlobal()->isWeakForLinker())) { @@ -2574,7 +2583,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned char OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (!PPCSubTarget.getTargetTriple().isMacOSX() || + (PPCSubTarget.getTargetTriple().isMacOSX() && PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which @@ -2941,6 +2950,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, SmallVector<TailCallArgumentInfo, 8> TailCallArguments; SmallVector<SDValue, 8> MemOpChains; + bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; @@ -2985,6 +2995,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { + seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { @@ -3011,9 +3022,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call. + // Set CR6 to true if this is a vararg call with floating args passed in + // registers. if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); + SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, + dl, MVT::i32), 0); RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); } @@ -3403,6 +3416,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, Ins, InVals); } +bool +PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_PPC); +} + SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -4490,7 +4514,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, PPCSubTarget); @@ -5504,7 +5529,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: @@ -5634,7 +5659,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { // FIXME: PPC does not allow r+i addressing modes for vectors! // PPC allows a sign-extended 16-bit immediate field. @@ -5670,7 +5695,7 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. -bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ +bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ // PPC allows a sign-extended 16-bit immediate field. return (V > -(1 << 16) && V < (1 << 16)-1); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 986b4e7..430e45e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -246,7 +246,7 @@ namespace llvm { virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address @@ -323,7 +323,7 @@ namespace llvm { /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(const Type *Ty) const; + unsigned getByValTypeAlignment(Type *Ty) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. @@ -334,12 +334,12 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. - virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; /// isLegalAddressImmediate - Return true if the GlobalValue can be used as /// the offset of the target addressing mode. @@ -390,7 +390,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, @@ -444,6 +445,12 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + virtual bool + CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 143444f..2bc109c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -15,18 +15,18 @@ #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCHazardRecognizers.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/STLExtras.h" @@ -334,7 +334,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const{ DebugLoc DL; - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(SrcReg, @@ -350,7 +350,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { if (SrcReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) .addReg(SrcReg, @@ -366,17 +366,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (RC == PPC::F4RCRegisterClass) { + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { // FIXME (64-bit): Enable @@ -402,7 +402,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { - unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; + unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; // rlwinm scratch, scratch, ShiftBits, 0, 31. NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(ShiftBits) @@ -414,7 +414,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::CRBITRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should // not cause any bug. If we need other uses of CR bits, the following @@ -448,7 +448,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, PPC::CRRCRegisterClass, NewMIs); - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // STVX VAL, 0, R0 @@ -499,7 +499,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs)const{ - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), DestReg), FrameIdx)); @@ -508,7 +508,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::R11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { if (DestReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), FrameIdx)); @@ -517,13 +517,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::R11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); - } else if (RC == PPC::F4RCRegisterClass) { + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); - } else if (RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { // FIXME: We need a scatch reg here. The trouble with using R0 is that // it's possible for the stack frame to be so big the save location is // out of range of immediate offsets, necessitating another register. @@ -537,7 +537,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { - unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) @@ -546,7 +546,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) .addReg(ScratchReg)); - } else if (RC == PPC::CRBITRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { unsigned Reg = 0; if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || @@ -577,7 +577,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, PPC::CRRCRegisterClass, NewMIs); - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // Dest = LVX 0, R0 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 773578c..f248b5b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1053,6 +1053,10 @@ def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; +def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), + "crxor $dst, $dst, $dst", BrCR, + []>; + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1472,5 +1476,7 @@ def : Pat<(membarrier (i32 imm /*ll*/), (i32 imm /*device*/)), (SYNC)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; + include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9c2428b..2e90b7a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" @@ -68,52 +67,12 @@ PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { (EnablePPC64RS && Subtarget.isPPC64())); } -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { - using namespace PPC; - switch (RegEnum) { - case 0: return 0; - case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; - case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; - case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; - case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; - case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; - case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; - case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; - case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; - case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; - case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; - case R10: case X10: case F10: case V10: case CR2EQ: return 10; - case R11: case X11: case F11: case V11: case CR2UN: return 11; - case R12: case X12: case F12: case V12: case CR3LT: return 12; - case R13: case X13: case F13: case V13: case CR3GT: return 13; - case R14: case X14: case F14: case V14: case CR3EQ: return 14; - case R15: case X15: case F15: case V15: case CR3UN: return 15; - case R16: case X16: case F16: case V16: case CR4LT: return 16; - case R17: case X17: case F17: case V17: case CR4GT: return 17; - case R18: case X18: case F18: case V18: case CR4EQ: return 18; - case R19: case X19: case F19: case V19: case CR4UN: return 19; - case R20: case X20: case F20: case V20: case CR5LT: return 20; - case R21: case X21: case F21: case V21: case CR5GT: return 21; - case R22: case X22: case F22: case V22: case CR5EQ: return 22; - case R23: case X23: case F23: case V23: case CR5UN: return 23; - case R24: case X24: case F24: case V24: case CR6LT: return 24; - case R25: case X25: case F25: case V25: case CR6GT: return 25; - case R26: case X26: case F26: case V26: case CR6EQ: return 26; - case R27: case X27: case F27: case V27: case CR6UN: return 27; - case R28: case X28: case F28: case V28: case CR7LT: return 28; - case R29: case X29: case F29: case V29: case CR7GT: return 29; - case R30: case X30: case F30: case V30: case CR7EQ: return 30; - case R31: case X31: case F31: case V31: case CR7UN: return 31; - default: - llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); - } -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) - : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) { + : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, + ST.isPPC64() ? 0 : 1, + ST.isPPC64() ? 0 : 1), + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -519,7 +478,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) .addReg(Reg, RegState::Kill) - .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4) + .addImm(getPPCRegisterNumbering(SrcReg) * 4) .addImm(0) .addImm(31); @@ -668,10 +627,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); } -unsigned PPCRegisterInfo::getRARegister() const { - return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; -} - unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -688,27 +643,3 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const { unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } - -/// DWARFFlavour - Flavour of dwarf regnumbers -/// -namespace DWARFFlavour { - enum { - PPC64 = 0, PPC32 = 1 - }; -} - -int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - unsigned Flavour = Subtarget.isPPC64() ? - DWARFFlavour::PPC64 : DWARFFlavour::PPC32; - - return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour); -} - -int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - unsigned Flavour = Subtarget.isPPC64() ? - DWARFFlavour::PPC64 : DWARFFlavour::PPC32; - - return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour); -} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 33fe5eb..1cc7213 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -33,10 +33,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// PPC::F14, return the number that it corresponds to (e.g. 14). - static unsigned getRegisterNumbering(unsigned RegEnum); - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; @@ -62,15 +58,11 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 5ea9b0f..cf194de 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -15,7 +15,7 @@ #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include <cstdlib> #define GET_SUBTARGETINFO_TARGET_DESC diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index e0ea5ad..f5744b8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -16,76 +16,43 @@ #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool NoExecStack) { - if (Triple(TT).isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - - return NULL; -} - extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); - - // Register the MC Code Emitter - TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); - - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend); - TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer); - TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer); } - -PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), +PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - - if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isDarwin()) - setRelocationModel(Reloc::DynamicNoPIC); - else - setRelocationModel(Reloc::Static); - } } /// Override this for PowerPC. Tail merging happily breaks up instruction issue /// groups, which typically degrades performance. bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } -PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : PPCTargetMachine(T, TT, CPU, FS, false) { +PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { } -PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : PPCTargetMachine(T, TT, CPU, FS, true) { +PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { } @@ -110,19 +77,11 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) { - // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many - // instructions to materialize arbitrary global variable + function + - // constant pool addresses. - setRelocationModel(Reloc::PIC_); + if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. DisableJumpTables = true; - } else { - setRelocationModel(Reloc::Static); - } // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index baf07e3..d06f084 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -40,9 +40,9 @@ class PPCTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: - PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool is64Bit); + PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -77,16 +77,18 @@ public: /// class PPC32TargetMachine : public PPCTargetMachine { public: - PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { public: - PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index ad607d0..5dc8568 100644 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -9,7 +9,7 @@ #include "PPC.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::ThePPC32Target, llvm::ThePPC64Target; diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 1e8c029..0000000 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMSparcDesc - SparcMCTargetDesc.cpp - SparcMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile deleted file mode 100644 index abcbe2d..0000000 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMSparcDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index cb92a2b..cb2a7df 100644 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "SparcMCTargetDesc.h" #include "SparcMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SparcGenInstrInfo.inc" @@ -35,8 +36,10 @@ static MCInstrInfo *createSparcMCInstrInfo() { return X; } -extern "C" void LLVMInitializeSparcMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); +static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSparcMCRegisterInfo(X, SP::I7); + return X; } static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,12 +49,31 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeSparcMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, - createSparcMCSubtargetInfo); +static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeSparcMCAsmInfo() { +extern "C" void LLVMInitializeSparcTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget); RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget, + createSparcMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target, + createSparcMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, + createSparcMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index edde842..345e1bc 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -22,9 +22,9 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 6f30d3f..d70b163 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -631,8 +631,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const assert(CalleeFn->hasStructRetAttr() && "Callee does not have the StructRet attribute."); - const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); + Type *ElementTy = Ty->getElementType(); return getTargetData()->getTypeAllocSize(ElementTy); } @@ -748,8 +748,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - // SPARC has no intrinsics for these particular operations. + // FIXME: There are instructions available for ATOMIC_FENCE + // on SparcV8 and later. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 4e3ddf8..7a6bf50 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -17,8 +17,8 @@ #include "SparcSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index 0acdd2c..8c16251 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Type.h" @@ -31,7 +30,7 @@ using namespace llvm; SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii) - : SparcGenRegisterInfo(), Subtarget(st), TII(tii) { + : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) { } const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -113,10 +112,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, void SparcRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} -unsigned SparcRegisterInfo::getRARegister() const { - return SP::I7; -} - unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } @@ -130,11 +125,3 @@ unsigned SparcRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h index ec9e63a..f845667 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h +++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h @@ -46,15 +46,11 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp index de647e8..6c501cf 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp @@ -13,7 +13,7 @@ #include "SparcSubtarget.h" #include "Sparc.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index cbe6d87..3d7b4a4 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -13,7 +13,7 @@ #include "Sparc.h" #include "SparcTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeSparcTarget() { @@ -24,10 +24,11 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine ctor - Create an ILP32 architecture model /// -SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS), +SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), @@ -51,15 +52,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, } SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : SparcTargetMachine(T, TT, CPU, FS, false) { + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : SparcTargetMachine(T, TT, CPU, FS, true) { + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) { } diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h index 799fc49..3c907dd 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h @@ -33,9 +33,9 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcInstrInfo InstrInfo; SparcFrameLowering FrameLowering; public: - SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool is64bit); + SparcTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -62,16 +62,18 @@ public: /// class SparcV8TargetMachine : public SparcTargetMachine { public: - SparcV8TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SparcV8TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; /// SparcV9TargetMachine - Sparc 64-bit target machine /// class SparcV9TargetMachine : public SparcTargetMachine { public: - SparcV9TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SparcV9TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index 5c06f07..c9d5b7b 100644 --- a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Sparc.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheSparcTarget; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 2ac9016..0000000 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMSystemZDesc - SystemZMCTargetDesc.cpp - SystemZMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile deleted file mode 100644 index 08f1a9d..0000000 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 5a826a6..23fb1e0 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "SystemZMCTargetDesc.h" #include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SystemZGenInstrInfo.inc" @@ -35,9 +36,10 @@ static MCInstrInfo *createSystemZMCInstrInfo() { return X; } -extern "C" void LLVMInitializeSystemZMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, - createSystemZMCInstrInfo); +static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSystemZMCRegisterInfo(X, 0); + return X; } static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, @@ -48,11 +50,32 @@ static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, return X; } -extern "C" void LLVMInitializeSystemZMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, - createSystemZMCSubtargetInfo); +static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::Static; + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeSystemZMCAsmInfo() { +extern "C" void LLVMInitializeSystemZTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, + createSystemZMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, + createSystemZMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheSystemZTarget, + createSystemZMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, + createSystemZMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index fd4d8b7..43dcdfc 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -28,10 +28,8 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 871c297..48ca99f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,6 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 99e2730..5f3dd80 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "SystemZGenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 11a39fc..580d65b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -478,7 +478,7 @@ def MOV64rmm : RSYI<0x04EB, "lmg\t{$from, $to, $dst}", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, +let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1, Constraints = "$src = $dst" in { def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), "lhi\t${dst:subreg_even}, 0", diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 59692e8..b1050d4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -33,7 +33,7 @@ using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(), TM(tm), TII(tii) { + : SystemZGenRegisterInfo(0), TM(tm), TII(tii) { } const unsigned* @@ -126,11 +126,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } -unsigned SystemZRegisterInfo::getRARegister() const { - assert(0 && "What is the return address register"); - return 0; -} - unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { assert(0 && "What is the frame register"); @@ -146,13 +141,3 @@ unsigned SystemZRegisterInfo::getEHHandlerRegister() const { assert(0 && "What is the exception handler register"); return 0; } - -int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "What is the dwarf register number"); - return -1; -} - -int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - assert(0 && "What is the dwarf register number"); - return -1; -} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 2e262e1..03935b2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -48,15 +48,11 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index b3ed066..0845510 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -15,7 +15,7 @@ #include "SystemZ.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 48298cc..e390f06 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -10,7 +10,7 @@ #include "SystemZTargetMachine.h" #include "SystemZ.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeSystemZTarget() { @@ -21,18 +21,15 @@ extern "C" void LLVMInitializeSystemZTarget() { /// SystemZTargetMachine ctor - Create an ILP64 architecture model /// SystemZTargetMachine::SystemZTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { - - if (getRelocationModel() == Reloc::Default) - setRelocationModel(Reloc::Static); } bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index e40b556..43dce4b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -37,8 +37,9 @@ class SystemZTargetMachine : public LLVMTargetMachine { SystemZSelectionDAGInfo TSInfo; SystemZFrameLowering FrameLowering; public: - SystemZTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SystemZTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp index 8272b11..da99282 100644 --- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -9,7 +9,7 @@ #include "SystemZ.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheSystemZTarget; diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp index a42ce54..a2b83bc 100644 --- a/contrib/llvm/lib/Target/Target.cpp +++ b/contrib/llvm/lib/Target/Target.cpp @@ -17,6 +17,7 @@ #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include <cstring> @@ -39,6 +40,11 @@ void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) { unwrap(PM)->add(new TargetData(*unwrap(TD))); } +void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI, + LLVMPassManagerRef PM) { + unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI))); +} + char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) { std::string StringRep = unwrap(TD)->getStringRepresentation(); return strdup(StringRep.c_str()); @@ -87,13 +93,13 @@ unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD, unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy, unsigned long long Offset) { - const StructType *STy = unwrap<StructType>(StructTy); + StructType *STy = unwrap<StructType>(StructTy); return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset); } unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy, unsigned Element) { - const StructType *STy = unwrap<StructType>(StructTy); + StructType *STy = unwrap<StructType>(StructTy); return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element); } diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp deleted file mode 100644 index a97b0e8..0000000 --- a/contrib/llvm/lib/Target/TargetAsmInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -using namespace llvm; - -TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) { - TLOF = &TM.getTargetLowering()->getObjFileLowering(); - TFI = TM.getFrameLowering(); - TRI = TM.getRegisterInfo(); - TFI->getInitialFrameState(InitialFrameState); -} diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp index 17d022a..bd6a6b6 100644 --- a/contrib/llvm/lib/Target/TargetData.cpp +++ b/contrib/llvm/lib/Target/TargetData.cpp @@ -41,7 +41,7 @@ char TargetData::ID = 0; // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { +StructLayout::StructLayout(StructType *ST, const TargetData &TD) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; @@ -49,7 +49,7 @@ StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { - const Type *Ty = ST->getElementType(i); + Type *Ty = ST->getElementType(i); unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. @@ -139,6 +139,7 @@ void TargetData::init(StringRef Desc) { PointerMemSize = 8; PointerABIAlign = 8; PointerPrefAlign = PointerABIAlign; + StackNaturalAlign = 0; // Default alignments setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1 @@ -218,7 +219,12 @@ void TargetData::init(StringRef Desc) { Token = Split.second; } while (!Specifier.empty() || !Token.empty()); break; - + case 'S': // Stack natural alignment. + StackNaturalAlign = getInt(Specifier.substr(1)); + StackNaturalAlign /= 8; + // FIXME: Should we really be truncating these alingments and + // sizes silently? + break; default: break; } @@ -261,7 +267,7 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, /// preferred if ABIInfo = false) the target wants for the specified datatype. unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, bool ABIInfo, - const Type *Ty) const { + Type *Ty) const { // Check to see if we have an exact match and remember the best match we see. int BestMatchIdx = -1; int LargestInt = -1; @@ -315,7 +321,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, namespace { class StructLayoutMap { - typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; + typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy; LayoutInfoTy LayoutInfo; public: @@ -329,7 +335,7 @@ public: } } - StructLayout *&operator[](const StructType *STy) { + StructLayout *&operator[](StructType *STy) { return LayoutInfo[STy]; } @@ -343,7 +349,7 @@ TargetData::~TargetData() { delete static_cast<StructLayoutMap*>(LayoutMap); } -const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { +const StructLayout *TargetData::getStructLayout(StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); @@ -372,7 +378,9 @@ std::string TargetData::getStringRepresentation() const { OS << (LittleEndian ? "e" : "E") << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 - << ':' << PointerPrefAlign*8; + << ':' << PointerPrefAlign*8 + << "-S" << StackNaturalAlign*8; + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { const TargetAlignElem &AI = Alignments[i]; OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' @@ -389,14 +397,14 @@ std::string TargetData::getStringRepresentation() const { } -uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { +uint64_t TargetData::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: case Type::PointerTyID: return getPointerSizeInBits(); case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); } case Type::StructTyID: @@ -435,7 +443,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { +unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const { int AlignType = -1; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -485,7 +493,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { abi_or_pref, Ty); } -unsigned TargetData::getABITypeAlignment(const Type *Ty) const { +unsigned TargetData::getABITypeAlignment(Type *Ty) const { return getAlignment(Ty, true); } @@ -496,7 +504,7 @@ unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { } -unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { +unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) return Alignments[i].ABIAlign; @@ -504,11 +512,11 @@ unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { return getABITypeAlignment(Ty); } -unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const { +unsigned TargetData::getPrefTypeAlignment(Type *Ty) const { return getAlignment(Ty, false); } -unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { +unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const { unsigned Align = getPrefTypeAlignment(Ty); assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); return Log2_32(Align); @@ -521,16 +529,17 @@ IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { } -uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, - unsigned NumIndices) const { - const Type *Ty = ptrTy; +uint64_t TargetData::getIndexedOffset(Type *ptrTy, + ArrayRef<Value *> Indices) const { + Type *Ty = ptrTy; assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()"); uint64_t Result = 0; generic_gep_type_iterator<Value* const*> - TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices); - for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) { - if (const StructType *STy = dyn_cast<StructType>(*TI)) { + TI = gep_type_begin(ptrTy, Indices); + for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX; + ++CurIDX, ++TI) { + if (StructType *STy = dyn_cast<StructType>(*TI)) { assert(Indices[CurIDX]->getType() == Type::getInt32Ty(ptrTy->getContext()) && "Illegal struct idx"); @@ -561,7 +570,7 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, /// global. This includes an explicitly requested alignment (if the global /// has one). unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { - const Type *ElemType = GV->getType()->getElementType(); + Type *ElemType = GV->getType()->getElementType(); unsigned Alignment = getPrefTypeAlignment(ElemType); unsigned GVAlignment = GV->getAlignment(); if (GVAlignment >= Alignment) { diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp index 19fd581..122f869 100644 --- a/contrib/llvm/lib/Target/TargetFrameLowering.cpp +++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp @@ -23,14 +23,6 @@ using namespace llvm; TargetFrameLowering::~TargetFrameLowering() { } -/// getInitialFrameState - Returns a list of machine moves that are assumed -/// on entry to a function. -void -TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Default is to do nothing. -} - /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp index 703431b..56b7b69 100644 --- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -35,34 +35,16 @@ using namespace llvm; // Generic Code //===----------------------------------------------------------------------===// -TargetLoweringObjectFile::TargetLoweringObjectFile() : - Ctx(0), - TextSection(0), - DataSection(0), - BSSSection(0), - ReadOnlySection(0), - StaticCtorSection(0), - StaticDtorSection(0), - LSDASection(0), - CompactUnwindSection(0), - DwarfAbbrevSection(0), - DwarfInfoSection(0), - DwarfLineSection(0), - DwarfFrameSection(0), - DwarfPubNamesSection(0), - DwarfPubTypesSection(0), - DwarfDebugInlineSection(0), - DwarfStrSection(0), - DwarfLocSection(0), - DwarfARangesSection(0), - DwarfRangesSection(0), - DwarfMacroInfoSection(0), - TLSExtraDataSection(0), - CommDirectiveSupportsAlignment(true), - SupportsWeakOmittedEHFrame(true), - IsFunctionEHFrameSymbolPrivate(true) { +/// Initialize - this method must be called before any actual lowering is +/// done. This specifies the current context for codegen, and gives the +/// lowering implementations a chance to set up their default sections. +void TargetLoweringObjectFile::Initialize(MCContext &ctx, + const TargetMachine &TM) { + Ctx = &ctx; + InitMCObjectFileInfo(TM.getTargetTriple(), + TM.getRelocationModel(), TM.getCodeModel(), *Ctx); } - + TargetLoweringObjectFile::~TargetLoweringObjectFile() { } @@ -93,7 +75,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV) { /// known to have a type that is an array of 1/2/4 byte elements) ends with a /// nul value and contains no other nuls in it. static bool IsNullTerminatedString(const Constant *C) { - const ArrayType *ATy = cast<ArrayType>(C->getType()); + ArrayType *ATy = cast<ArrayType>(C->getType()); // First check: is we have constant array of i8 terminated with zero if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) { @@ -188,8 +170,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. - if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { - if (const IntegerType *ITy = + if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(ATy->getElementType())) { if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 || ITy->getBitWidth() == 32) && @@ -341,20 +323,3 @@ getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, } } } - -unsigned TargetLoweringObjectFile::getPersonalityEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getLSDAEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getTTypeEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp index 74a1f4e..fe8a7ce 100644 --- a/contrib/llvm/lib/Target/TargetMachine.cpp +++ b/contrib/llvm/lib/Target/TargetMachine.cpp @@ -40,8 +40,6 @@ namespace llvm { bool JITExceptionHandling; bool JITEmitDebugInfo; bool JITEmitDebugInfoToDisk; - Reloc::Model RelocationModel; - CodeModel::Model CMModel; bool GuaranteedTailCallOpt; unsigned StackAlignmentOverride; bool RealignStack; @@ -49,6 +47,7 @@ namespace llvm { bool StrongPHIElim; bool HasDivModLibcall; bool AsmVerbosityDefault(false); + bool EnableSegmentedStacks; } static cl::opt<bool, true> @@ -143,38 +142,6 @@ EmitJitDebugInfoToDisk("jit-emit-debug-to-disk", cl::location(JITEmitDebugInfoToDisk), cl::init(false)); -static cl::opt<llvm::Reloc::Model, true> -DefRelocationModel("relocation-model", - cl::desc("Choose relocation model"), - cl::location(RelocationModel), - cl::init(Reloc::Default), - cl::values( - clEnumValN(Reloc::Default, "default", - "Target default relocation model"), - clEnumValN(Reloc::Static, "static", - "Non-relocatable code"), - clEnumValN(Reloc::PIC_, "pic", - "Fully relocatable, position independent code"), - clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", - "Relocatable external references, non-relocatable code"), - clEnumValEnd)); -static cl::opt<llvm::CodeModel::Model, true> -DefCodeModel("code-model", - cl::desc("Choose code model"), - cl::location(CMModel), - cl::init(CodeModel::Default), - cl::values( - clEnumValN(CodeModel::Default, "default", - "Target default code model"), - clEnumValN(CodeModel::Small, "small", - "Small code model"), - clEnumValN(CodeModel::Kernel, "kernel", - "Kernel code model"), - clEnumValN(CodeModel::Medium, "medium", - "Medium code model"), - clEnumValN(CodeModel::Large, "large", - "Large code model"), - clEnumValEnd)); static cl::opt<bool, true> EnableGuaranteedTailCallOpt("tailcallopt", cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), @@ -212,13 +179,20 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); +static cl::opt<bool, true> +SegmentedStacks("segmented-stacks", + cl::desc("Use segmented stacks if possible."), + cl::location(EnableSegmentedStacks), + cl::init(false)); + //--------------------------------------------------------------------------- // TargetMachine Class // TargetMachine::TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS) - : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0), + : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), + CodeGenInfo(0), AsmInfo(0), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), @@ -231,29 +205,24 @@ TargetMachine::TargetMachine(const Target &T, } TargetMachine::~TargetMachine() { + delete CodeGenInfo; delete AsmInfo; } /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. -Reloc::Model TargetMachine::getRelocationModel() { - return RelocationModel; -} - -/// setRelocationModel - Sets the code generation relocation model. -void TargetMachine::setRelocationModel(Reloc::Model Model) { - RelocationModel = Model; +Reloc::Model TargetMachine::getRelocationModel() const { + if (!CodeGenInfo) + return Reloc::Default; + return CodeGenInfo->getRelocationModel(); } /// getCodeModel - Returns the code model. The choices are small, kernel, /// medium, large, and target default. -CodeModel::Model TargetMachine::getCodeModel() { - return CMModel; -} - -/// setCodeModel - Sets the code model. -void TargetMachine::setCodeModel(CodeModel::Model Model) { - CMModel = Model; +CodeModel::Model TargetMachine::getCodeModel() const { + if (!CodeGenInfo) + return CodeModel::Default; + return CodeGenInfo->getCodeModel(); } bool TargetMachine::getAsmVerbosityDefault() { diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp index 90a8f8d..67239b8 100644 --- a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp @@ -98,44 +98,25 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, } const TargetRegisterClass * -llvm::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) { - // First take care of the trivial cases +TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B) const { + // First take care of the trivial cases. if (A == B) return A; if (!A || !B) return 0; - // If B is a subclass of A, it will be handled in the loop below - if (B->hasSubClass(A)) - return A; + // Register classes are ordered topologically, so the largest common + // sub-class it the common sub-class with the smallest ID. + const unsigned *SubA = A->getSubClassMask(); + const unsigned *SubB = B->getSubClassMask(); - const TargetRegisterClass *Best = 0; - for (TargetRegisterClass::sc_iterator I = A->subclasses_begin(); - const TargetRegisterClass *X = *I; ++I) { - if (X == B) - return B; // B is a subclass of A - - // X must be a common subclass of A and B - if (!B->hasSubClass(X)) - continue; - - // A superclass is definitely better. - if (!Best || Best->hasSuperClass(X)) { - Best = X; - continue; - } - - // A subclass is definitely worse - if (Best->hasSubClass(X)) - continue; - - // Best and *I have no super/sub class relation - pick the larger class, or - // the smaller spill size. - int nb = std::distance(Best->begin(), Best->end()); - int ni = std::distance(X->begin(), X->end()); - if (ni>nb || (ni==nb && X->getSize() < Best->getSize())) - Best = X; - } - return Best; + // We could start the search from max(A.ID, B.ID), but we are only going to + // execute 2-3 iterations anyway. + for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32) + if (unsigned Common = *SubA++ & *SubB++) + return getRegClass(Base + CountTrailingZeros_32(Common)); + + // No common sub-class exists. + return NULL; } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp index ec73087..1eaccff 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -7,20 +7,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetRegistry.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "X86.h" +#include "llvm/MC/MCTargetAsmLexer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; namespace { -class X86AsmLexer : public TargetAsmLexer { +class X86AsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; bool tentativeIsValid; @@ -60,8 +60,8 @@ protected: } } public: - X86AsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { + X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) + : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { } }; @@ -160,6 +160,6 @@ AsmToken X86AsmLexer::LexTokenIntel() { } extern "C" void LLVMInitializeX86AsmLexer() { - RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target); - RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); + RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target); + RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target); } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index d45dd35..cb4f15f 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmParser.h" -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -26,6 +24,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -33,7 +32,7 @@ using namespace llvm; namespace { struct X86Operand; -class X86ATTAsmParser : public TargetAsmParser { +class X86ATTAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; @@ -48,6 +47,7 @@ private: X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveCode(StringRef IDVal, SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -65,6 +65,10 @@ private: // FIXME: Can tablegen auto-generate this? return (STI.getFeatureBits() & X86::Mode64Bit) != 0; } + void SwitchMode() { + unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); + setAvailableFeatures(FB); + } /// @name Auto-generated Matcher Functions /// { @@ -76,7 +80,7 @@ private: public: X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : TargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -223,6 +227,21 @@ struct X86Operand : public MCParsedAsmOperand { (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } + bool isImmZExtu32u8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (Value <= 0x00000000000000FFULL); + } bool isImmSExti64i8() const { if (!isImm()) return false; @@ -382,19 +401,25 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, if (Tok.isNot(AsmToken::Identifier)) return Error(Tok.getLoc(), "invalid register name"); - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); // If the match failed, try the register name as lowercase. if (RegNo == 0) RegNo = MatchRegisterName(LowercaseString(Tok.getString())); - // FIXME: This should be done using Requires<In32BitMode> and - // Requires<In64BitMode> so "eiz" usage in 64-bit instructions - // can be also checked. - if (RegNo == X86::RIZ && !is64BitMode()) - return Error(Tok.getLoc(), "riz register in 64-bit mode only"); + if (!is64BitMode()) { + // FIXME: This should be done using Requires<In32BitMode> and + // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also + // checked. + // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a + // REX prefix. + if (RegNo == X86::RIZ || + X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || + X86II::isX86_64NonExtLowByteReg(RegNo) || + X86II::isX86_64ExtendedReg(RegNo)) + return Error(Tok.getLoc(), "register %" + + Tok.getString() + " is only available in 64-bit mode"); + } // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { @@ -472,7 +497,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() { SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; if (RegNo == X86::EIZ || RegNo == X86::RIZ) { - Error(Start, "eiz and riz can only be used as index registers"); + Error(Start, "%eiz and %riz can only be used as index registers"); return 0; } @@ -956,6 +981,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, // First, try a direct match. switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + default: break; case Match_Success: Out.EmitInstruction(Inst); return false; @@ -994,7 +1020,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; - MatchResultTy Match1, Match2, Match3, Match4; + unsigned Match1, Match2, Match3, Match4; Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; @@ -1096,6 +1122,8 @@ bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal.startswith(".code")) + return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); return true; } @@ -1124,15 +1152,35 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// ParseDirectiveCode +/// ::= .code32 | .code64 +bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { + if (IDVal == ".code32") { + Parser.Lex(); + if (is64BitMode()) { + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); + } + } else if (IDVal == ".code64") { + Parser.Lex(); + if (!is64BitMode()) { + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); + } + } else { + return Error(L, "unexpected directive " + IDVal); + } + return false; +} extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); + RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4a0d2ec..3aacb20 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -21,13 +21,16 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_REGINFO_ENUM #include "X86GenRegisterInfo.inc" +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; @@ -64,8 +67,8 @@ extern Target TheX86_32Target, TheX86_64Target; static bool translateInstruction(MCInst &target, InternalInstruction &source); -X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : - MCDisassembler(), +X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : + MCDisassembler(STI), fMode(mode) { } @@ -106,28 +109,34 @@ static void logger(void* arg, const char* log) { // Public interface for the disassembler // -bool X86GenericDisassembler::getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const { +MCDisassembler::DecodeStatus +X86GenericDisassembler::getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const { InternalInstruction internalInstr; + + dlog_t loggerFn = logger; + if (&vStream == &nulls()) + loggerFn = 0; // Disable logging completely if it's going to nulls(). int ret = decodeInstruction(&internalInstr, regionReader, (void*)®ion, - logger, + loggerFn, (void*)&vStream, address, fMode); if (ret) { size = internalInstr.readerCursor - address; - return false; + return Fail; } else { size = internalInstr.length; - return !translateInstruction(instr, internalInstr); + return (!translateInstruction(instr, internalInstr)) ? Success : Fail; } } @@ -183,8 +192,46 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } } + // By default sign-extend all X86 immediates based on their encoding. + else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || + type == TYPE_IMM64) { + uint32_t Opcode = mcInst.getOpcode(); + switch (operand.encoding) { + default: + break; + case ENCODING_IB: + // Special case those X86 instructions that use the imm8 as a set of + // bits, bit count, etc. and are not sign-extend. + if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && + Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && + Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && + Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && + Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && + Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && + Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && + Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && + Opcode != X86::VINSERTPSrr) + type = TYPE_MOFFS8; + break; + case ENCODING_IW: + type = TYPE_MOFFS16; + break; + case ENCODING_ID: + type = TYPE_MOFFS32; + break; + case ENCODING_IO: + type = TYPE_MOFFS64; + break; + } + } switch (type) { + case TYPE_XMM128: + mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); + return; + case TYPE_XMM256: + mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); + return; case TYPE_MOFFS8: case TYPE_REL8: if(immediate & 0x80) @@ -543,12 +590,12 @@ static bool translateInstruction(MCInst &mcInst, return false; } -static MCDisassembler *createX86_32Disassembler(const Target &T) { - return new X86Disassembler::X86_32Disassembler; +static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_32Disassembler(STI); } -static MCDisassembler *createX86_64Disassembler(const Target &T) { - return new X86Disassembler::X86_64Disassembler; +static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_64Disassembler(STI); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h index 550cf9d..6ac9a0f 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h @@ -92,6 +92,7 @@ struct InternalInstruction; namespace llvm { class MCInst; +class MCSubtargetInfo; class MemoryObject; class raw_ostream; @@ -107,16 +108,17 @@ protected: /// Constructor - Initializes the disassembler. /// /// @param mode - The X86 architecture mode to decode for. - X86GenericDisassembler(DisassemblerMode mode); + X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode); public: ~X86GenericDisassembler(); /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. EDInstInfo *getEDInfo() const; @@ -127,24 +129,24 @@ private: /// X86_16Disassembler - 16-bit X86 disassembler. class X86_16Disassembler : public X86GenericDisassembler { public: - X86_16Disassembler() : - X86GenericDisassembler(MODE_16BIT) { + X86_16Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_16BIT) { } }; /// X86_16Disassembler - 32-bit X86 disassembler. class X86_32Disassembler : public X86GenericDisassembler { public: - X86_32Disassembler() : - X86GenericDisassembler(MODE_32BIT) { + X86_32Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_32BIT) { } }; /// X86_16Disassembler - 64-bit X86 disassembler. class X86_64Disassembler : public X86GenericDisassembler { public: - X86_64Disassembler() : - X86GenericDisassembler(MODE_64BIT) { + X86_64Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_64BIT) { } }; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index de1610b..f9b0fe5 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -58,8 +58,8 @@ static InstructionContext contextForAttrs(uint8_t attrMask) { * @return - TRUE if the ModR/M byte is required, FALSE otherwise. */ static int modRMRequired(OpcodeType type, - InstructionContext insnContext, - uint8_t opcode) { + InstructionContext insnContext, + uint8_t opcode) { const struct ContextDecision* decision = 0; switch (type) { @@ -391,7 +391,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -406,12 +406,14 @@ static int readPrefixes(struct InternalInstruction* insn) { consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ - - insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vexPrefix[2]) << 3) - | (rFromVEX2of3(insn->vexPrefix[1]) << 2) - | (xFromVEX2of3(insn->vexPrefix[1]) << 1) - | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + } switch (ppFromVEX3of3(insn->vexPrefix[2])) { @@ -433,7 +435,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { @@ -444,8 +446,10 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); - insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + } switch (ppFromVEX2of2(insn->vexPrefix[1])) { @@ -700,34 +704,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { } /* - * is64BitEquivalent - Determines whether two instruction names refer to - * equivalent instructions but one is 64-bit whereas the other is not. - * - * @param orig - The instruction that is not 64-bit - * @param equiv - The instruction that is 64-bit - */ -static BOOL is64BitEquivalent(const char* orig, const char* equiv) { - off_t i; - - for (i = 0;; i++) { - if (orig[i] == '\0' && equiv[i] == '\0') - return TRUE; - if (orig[i] == '\0' || equiv[i] == '\0') - return FALSE; - if (orig[i] != equiv[i]) { - if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') - continue; - if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') - continue; - if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') - continue; - return FALSE; - } - } -} - - -/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. @@ -763,8 +739,6 @@ static int getID(struct InternalInstruction* insn) { break; } - if (wFromVEX3of3(insn->vexPrefix[2])) - attrMask |= ATTR_REXW; if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } @@ -789,63 +763,55 @@ static int getID(struct InternalInstruction* insn) { } } else { - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; - } + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ - if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { /* - * Although for SSE instructions it is usually necessary to treat REX.W+F2 - * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is - * an occasional instruction where F2 is incidental and REX.W is the more - * significant. If the decoded instruction is 32-bit and adding REX.W - * instead of F2 changes a 32 to a 64, we adopt the new encoding. + * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit + * has precedence since there are no L-bit with W-bit entries in the tables. + * So if the L-bit isn't significant we should use the W-bit instead. */ - + const struct InstructionSpecifier *spec; - uint16_t instructionIDWithREXw; - const struct InstructionSpecifier *specWithREXw; - + uint16_t instructionIDWithWBit; + const struct InstructionSpecifier *specWithWBit; + spec = specifierForUID(instructionID); - - if (getIDWithAttrMask(&instructionIDWithREXw, + + if (getIDWithAttrMask(&instructionIDWithWBit, insn, - attrMask & (~ATTR_XD))) { - /* - * Decoding with REX.w would yield nothing; give up and return original - * decode. - */ - + (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = spec; return 0; } - - specWithREXw = specifierForUID(instructionIDWithREXw); - - if (is64BitEquivalent(spec->name, specWithREXw->name)) { - insn->instructionID = instructionIDWithREXw; - insn->spec = specWithREXw; + + specWithWBit = specifierForUID(instructionIDWithWBit); + + if (instructionID != instructionIDWithWBit) { + insn->instructionID = instructionIDWithWBit; + insn->spec = specWithWBit; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } - + if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -885,6 +851,43 @@ static int getID(struct InternalInstruction* insn) { } return 0; } + + if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && + insn->rexPrefix & 0x01) { + /* + * NOOP shouldn't decode as NOOP if REX.b is set. Instead + * it should decode as XCHG %r8, %eax. + */ + + const struct InstructionSpecifier *spec; + uint16_t instructionIDWithNewOpcode; + const struct InstructionSpecifier *specWithNewOpcode; + + spec = specifierForUID(instructionID); + + /* Borrow opcode from one of the other XCHGar opcodes */ + insn->opcode = 0x91; + + if (getIDWithAttrMask(&instructionIDWithNewOpcode, + insn, + attrMask)) { + insn->opcode = 0x90; + + insn->instructionID = instructionID; + insn->spec = spec; + return 0; + } + + specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); + + /* Change back */ + insn->opcode = 0x90; + + insn->instructionID = instructionIDWithNewOpcode; + insn->spec = specWithNewOpcode; + + return 0; + } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); @@ -1434,11 +1437,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { } /* - * readVVVV - Consumes an immediate operand from an instruction, given the - * desired operand size. + * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. * * @param insn - The instruction whose operand is to be read. - * @return - 0 if the immediate was successfully consumed; nonzero + * @return - 0 if the vvvv was successfully consumed; nonzero * otherwise. */ static int readVVVV(struct InternalInstruction* insn) { @@ -1451,6 +1453,9 @@ static int readVVVV(struct InternalInstruction* insn) { else return -1; + if (insn->mode != MODE_64BIT) + insn->vvvv &= 0x7; + return 0; } @@ -1463,8 +1468,14 @@ static int readVVVV(struct InternalInstruction* insn) { */ static int readOperands(struct InternalInstruction* insn) { int index; + int hasVVVV, needVVVV; dbgprintf(insn, "readOperands()"); + + /* If non-zero vvvv specified, need to make sure one of the operands + uses it. */ + hasVVVV = !readVVVV(insn); + needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { switch (insn->spec->operands[index].encoding) { @@ -1537,7 +1548,8 @@ static int readOperands(struct InternalInstruction* insn) { return -1; break; case ENCODING_VVVV: - if (readVVVV(insn)) + needVVVV = 0; /* Mark that we have found a VVVV operand. */ + if (!hasVVVV) return -1; if (fixupReg(insn, &insn->spec->operands[index])) return -1; @@ -1549,6 +1561,9 @@ static int readOperands(struct InternalInstruction* insn) { return -1; } } + + /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ + if (needVVVV) return -1; return 0; } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 70315ed..8b79335 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -81,12 +81,18 @@ enum attributeBits { "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ "but not the operands") \ + ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ "secondary") \ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ + ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ "opcode") \ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ @@ -104,7 +110,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ - ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt deleted file mode 100644 index 033973e..0000000 --- a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86AsmPrinter - X86ATTInstPrinter.cpp - X86IntelInstPrinter.cpp - X86InstComments.cpp - ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile deleted file mode 100644 index c82aa33..0000000 --- a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86AsmPrinter - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index c37d879..029d491 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -39,14 +39,17 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, OS << '%' << getRegisterName(RegNo); } -void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { +void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot) { // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) + if (CommentStream) { + printAnnotation(OS, Annot); EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + } } StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { @@ -90,7 +93,8 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { - O << '$' << Op.getImm(); + // Print X86 immediates as signed values. + O << '$' << (int64_t)Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 5426e5c..0293869 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -25,7 +25,7 @@ public: X86ATTInstPrinter(const MCAsmInfo &MAI); virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen, returns true if we successfully printed an diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 4e28dfe..8d85b95 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -14,9 +14,9 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "Utils/X86ShuffleDecode.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" -#include "../Utils/X86ShuffleDecode.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -136,9 +136,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::SHUFPDrmi: DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); break; case X86::SHUFPSrri: @@ -205,6 +207,31 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHPMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERMILPSri: + DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPSYri: + DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPDri: + DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPDYri: + DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERM2F128rr: + DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; } diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 506e26c..f9ab5ae 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -32,12 +32,15 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { +void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot) { printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) + if (CommentStream) { + printAnnotation(OS, Annot); EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + } } StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index e84a194..6d5ec62 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -27,7 +27,7 @@ public: : MCInstPrinter(MAI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index ca88f8f..0000000 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMX86Desc - X86MCTargetDesc.cpp - X86MCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile deleted file mode 100644 index b19774e..0000000 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 9b556a5..69ad7d7 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "X86.h" -#include "X86FixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -24,9 +24,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; // Option to allow disabling arithmetic relaxation to workaround PR9807, which @@ -63,10 +62,10 @@ public: : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {} }; -class X86AsmBackend : public TargetAsmBackend { +class X86AsmBackend : public MCAsmBackend { public: X86AsmBackend(const Target &T) - : TargetAsmBackend() {} + : MCAsmBackend() {} unsigned getNumFixupKinds() const { return X86::NumTargetFixupKinds; @@ -81,7 +80,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -94,6 +93,14 @@ public: assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); + + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert(isIntN(Size * 8 + 1, Value) && + "Value does not fit in the Fixup field"); + for (unsigned i = 0; i != Size; ++i) Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } @@ -426,8 +433,7 @@ public: } // end anonymous namespace -TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) @@ -439,8 +445,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, return new ELFX86_32AsmBackend(T, TheTriple.getOS()); } -TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h new file mode 100644 index 0000000..e6ba705 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -0,0 +1,548 @@ +//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the X86 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef X86BASEINFO_H +#define X86BASEINFO_H + +#include "X86MCTargetDesc.h" +#include "llvm/Support/DataTypes.h" +#include <cassert> + +namespace llvm { + +namespace X86 { + // Enums for memory operand decoding. Each memory operand is represented with + // a 5 operand sequence in the form: + // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] + // These enums help decode this. + enum { + AddrBaseReg = 0, + AddrScaleAmt = 1, + AddrIndexReg = 2, + AddrDisp = 3, + + /// AddrSegmentReg - The operand # of the segment in the memory operand. + AddrSegmentReg = 4, + + /// AddrNumOperands - Total number of operands in a memory reference. + AddrNumOperands = 5 + }; +} // end namespace X86; + + +/// X86II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace X86II { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // X86 Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a + /// relocation of: + /// SYMBOL_LABEL + [. - PICBASELABEL] + MO_GOT_ABSOLUTE_ADDRESS, + + /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the + /// immediate should get the value of the symbol minus the PIC base label: + /// SYMBOL_LABEL - PICBASELABEL + MO_PIC_BASE_OFFSET, + + /// MO_GOT - On a symbol operand this indicates that the immediate is the + /// offset to the GOT entry for the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOT + MO_GOT, + + /// MO_GOTOFF - On a symbol operand this indicates that the immediate is + /// the offset to the location of the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTOFF + MO_GOTOFF, + + /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is + /// offset to the GOT entry for the symbol name from the current code + /// location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTPCREL + MO_GOTPCREL, + + /// MO_PLT - On a symbol operand this indicates that the immediate is + /// offset to the PLT entry of symbol name from the current code location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @PLT + MO_PLT, + + /// MO_TLSGD - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSGD + MO_TLSGD, + + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @GOTTPOFF + MO_GOTTPOFF, + + /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @INDNTPOFF + MO_INDNTPOFF, + + /// MO_TPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TPOFF + MO_TPOFF, + + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @NTPOFF + MO_NTPOFF, + + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "__imp_FOO" symbol. This is used for + /// dllimport linkage on windows. + MO_DLLIMPORT, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and earlier. + MO_DARWIN_STUB, + + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a + /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY, + + /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates + /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is + /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY_PIC_BASE, + + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this + /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", + /// which is a PIC-base-relative reference to a hidden dyld lazy pointer + /// stub. + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, + + /// MO_TLVP - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// This is the TLS offset for the Darwin TLS mechanism. + MO_TLVP, + + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate + /// is some TLS offset from the picbase. + /// + /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. + MO_TLVP_PIC_BASE + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for X86 + // instructions. + // + + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// Raw - This form is for instructions that don't have any operands, so + /// they are just a fixed opcode value, like 'leave'. + RawFrm = 1, + + /// AddRegFrm - This form is used for instructions like 'push r32' that have + /// their one register operand added to their opcode. + AddRegFrm = 2, + + /// MRMDestReg - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is a register. + /// + MRMDestReg = 3, + + /// MRMDestMem - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is memory. + /// + MRMDestMem = 4, + + /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is a register. + /// + MRMSrcReg = 5, + + /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is memory. + /// + MRMSrcMem = 6, + + /// MRM[0-7][rm] - These forms are used to represent instructions that use + /// a Mod/RM byte, and use the middle field to hold extended opcode + /// information. In the intel manual these are represented as /0, /1, ... + /// + + // First, instructions that operate on a register r/m operand... + MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3 + MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7 + + // Next, instructions that operate on a memory r/m operand... + MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 + MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 + + // MRMInitReg - This form is used for instructions whose source and + // destinations are the same register. + MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, + + /// RawFrmImm8 - This is used for the ENTER instruction, which has two + /// immediates, the first of which is a 16-bit immediate (specified by + /// the imm encoding) and the second is a 8-bit fixed value. + RawFrmImm8 = 43, + + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two + /// immediates, the first of which is a 16 or 32-bit immediate (specified by + /// the imm encoding) and the second is a 16-bit fixed value. In the AMD + /// manual, this operand is described as pntr16:32 and pntr16:16 + RawFrmImm16 = 44, + + FormMask = 63, + + //===------------------------------------------------------------------===// + // Actual flags... + + // OpSize - Set if this instruction requires an operand size prefix (0x66), + // which most often indicates that the instruction operates on 16 bit data + // instead of 32 bit data. + OpSize = 1 << 6, + + // AsSize - Set if this instruction requires an operand size prefix (0x67), + // which most often indicates that the instruction address 16 bit address + // instead of 32 bit address (or 32 bit address in 64 bit mode). + AdSize = 1 << 7, + + //===------------------------------------------------------------------===// + // Op0Mask - There are several prefix bytes that are used to form two byte + // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is + // used to obtain the setting of this field. If no bits in this field is + // set, there is no prefix byte for obtaining a multibyte opcode. + // + Op0Shift = 8, + Op0Mask = 0x1F << Op0Shift, + + // TB - TwoByte - Set if this instruction has a two byte opcode, which + // starts with a 0x0F byte before the real opcode. + TB = 1 << Op0Shift, + + // REP - The 0xF3 prefix byte indicating repetition of the following + // instruction. + REP = 2 << Op0Shift, + + // D8-DF - These escape opcodes are used by the floating point unit. These + // values must remain sequential. + D8 = 3 << Op0Shift, D9 = 4 << Op0Shift, + DA = 5 << Op0Shift, DB = 6 << Op0Shift, + DC = 7 << Op0Shift, DD = 8 << Op0Shift, + DE = 9 << Op0Shift, DF = 10 << Op0Shift, + + // XS, XD - These prefix codes are for single and double precision scalar + // floating point operations performed in the SSE registers. + XD = 11 << Op0Shift, XS = 12 << Op0Shift, + + // T8, TA, A6, A7 - Prefix after the 0x0F prefix. + T8 = 13 << Op0Shift, TA = 14 << Op0Shift, + A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, + + // TF - Prefix before and after 0x0F + TF = 17 << Op0Shift, + + //===------------------------------------------------------------------===// + // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. + // They are used to specify GPRs and SSE registers, 64-bit operand size, + // etc. We only cares about REX.W and REX.R bits and only the former is + // statically determined. + // + REXShift = Op0Shift + 5, + REX_W = 1 << REXShift, + + //===------------------------------------------------------------------===// + // This three-bit field describes the size of an immediate operand. Zero is + // unused so that we can tell if we forgot to set a value. + ImmShift = REXShift + 1, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm16PCRel = 4 << ImmShift, + Imm32 = 5 << ImmShift, + Imm32PCRel = 6 << ImmShift, + Imm64 = 7 << ImmShift, + + //===------------------------------------------------------------------===// + // FP Instruction Classification... Zero is non-fp instruction. + + // FPTypeMask - Mask for all of the FP types... + FPTypeShift = ImmShift + 3, + FPTypeMask = 7 << FPTypeShift, + + // NotFP - The default, set for instructions that do not use FP registers. + NotFP = 0 << FPTypeShift, + + // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0 + ZeroArgFP = 1 << FPTypeShift, + + // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst + OneArgFP = 2 << FPTypeShift, + + // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a + // result back to ST(0). For example, fcos, fsqrt, etc. + // + OneArgFPRW = 3 << FPTypeShift, + + // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an + // explicit argument, storing the result to either ST(0) or the implicit + // argument. For example: fadd, fsub, fmul, etc... + TwoArgFP = 4 << FPTypeShift, + + // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an + // explicit argument, but have no destination. Example: fucom, fucomi, ... + CompareFP = 5 << FPTypeShift, + + // CondMovFP - "2 operand" floating point conditional move instructions. + CondMovFP = 6 << FPTypeShift, + + // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. + SpecialFP = 7 << FPTypeShift, + + // Lock prefix + LOCKShift = FPTypeShift + 3, + LOCK = 1 << LOCKShift, + + // Segment override prefixes. Currently we just need ability to address + // stuff in gs and fs segments. + SegOvrShift = LOCKShift + 1, + SegOvrMask = 3 << SegOvrShift, + FS = 1 << SegOvrShift, + GS = 2 << SegOvrShift, + + // Execution domain for SSE instructions in bits 23, 24. + // 0 in bits 23-24 means normal, non-SSE instruction. + SSEDomainShift = SegOvrShift + 2, + + OpcodeShift = SSEDomainShift + 2, + + //===------------------------------------------------------------------===// + /// VEX - The opcode prefix used by AVX instructions + VEXShift = OpcodeShift + 8, + VEX = 1U << 0, + + /// VEX_W - Has a opcode specific functionality, but is used in the same + /// way as REX_W is for regular SSE instructions. + VEX_W = 1U << 1, + + /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + /// address instructions in SSE are represented as 3 address ones in AVX + /// and the additional register is encoded in VEX_VVVV prefix. + VEX_4V = 1U << 2, + + /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, + /// must be encoded in the i8 immediate field. This usually happens in + /// instructions with 4 operands. + VEX_I8IMM = 1U << 3, + + /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + /// instruction uses 256-bit wide registers. This is usually auto detected + /// if a VR256 register is used, but some AVX instructions also have this + /// field marked when using a f256 memory references. + VEX_L = 1U << 4, + + // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX + // prefix. Usually used for scalar instructions. Needed by disassembler. + VEX_LIG = 1U << 5, + + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the + /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents + /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction + /// storing a classifier in the imm8 field. To simplify our implementation, + /// we handle this by storeing the classifier in the opcode field and using + /// this flag to indicate that the encoder should do the wacky 3DNow! thing. + Has3DNow0F0FOpcode = 1U << 6 + }; + + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the + // specified machine instruction. + // + static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + return TSFlags >> X86II::OpcodeShift; + } + + static inline bool hasImm(uint64_t TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field + /// of the specified instruction. + static inline unsigned getSizeOfImm(uint64_t TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: + case X86II::Imm16PCRel: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(uint64_t TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm16PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } + + /// getMemoryOperandNo - The function returns the MCInst operand # for the + /// first field of the memory operand. If the instruction doesn't have a + /// memory operand, this returns -1. + /// + /// Note that this ignores tied operands. If there is a tied register which + /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only + /// counted as one operand. + /// + static inline int getMemoryOperandNo(uint64_t TSFlags) { + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); + default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddRegFrm: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + case X86II::RawFrmImm8: + case X86II::RawFrmImm16: + return -1; + case X86II::MRMDestMem: + return 0; + case X86II::MRMSrcMem: { + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + unsigned FirstMemOp = 1; + if (HasVEX_4V) + ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + // Opcode == X86::LEA16r || Opcode == X86::LEA32r) + return FirstMemOp; + } + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + return -1; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + return 0; + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + case X86II::MRM_F8: + case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: + return -1; + } + } + + /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or + /// higher) register? e.g. r8, xmm8, xmm13, etc. + static inline bool isX86_64ExtendedReg(unsigned RegNo) { + switch (RegNo) { + default: break; + case X86::R8: case X86::R9: case X86::R10: case X86::R11: + case X86::R12: case X86::R13: case X86::R14: case X86::R15: + case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: + case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: + case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: + case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: + case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: + case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: + case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: + return true; + } + return false; + } + + static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + return (reg == X86::SPL || reg == X86::BPL || + reg == X86::SIL || reg == X86::DIL); + } +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index 17d242a..17d242a 100644 --- a/contrib/llvm/lib/Target/X86/X86FixupKinds.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index ce8ef49..2eee112 100644 --- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -12,12 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "X86.h" -#include "X86InstrInfo.h" -#include "X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +47,7 @@ public: } static unsigned GetX86RegNum(const MCOperand &MO) { - return X86RegisterInfo::getX86RegNum(MO.getReg()); + return X86_MC::getX86RegNum(MO.getReg()); } // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range @@ -159,9 +161,11 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); - - if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) || - (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg()))) + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) return true; return false; } @@ -191,11 +195,11 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { const MCExpr *Expr = NULL; if (DispOp.isImm()) { - // If this is a simple integer displacement that doesn't require a relocation, - // emit it now. + // If this is a simple integer displacement that doesn't require a + // relocation, emit it now. if (FixupKind != FK_PCRel_1 && - FixupKind != FK_PCRel_2 && - FixupKind != FK_PCRel_4) { + FixupKind != FK_PCRel_2 && + FixupKind != FK_PCRel_4) { EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } @@ -205,7 +209,9 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, } // If we have an immoffset, add it to the expression. - if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) { + if ((FixupKind == FK_Data_4 || + FixupKind == MCFixupKind(X86::reloc_signed_4byte)) && + StartsWithGlobalOffsetTable(Expr)) { assert(ImmOffset == 0); FixupKind = MCFixupKind(X86::reloc_global_offset_table); @@ -346,7 +352,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, } // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { @@ -486,71 +492,100 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_L = 1; } - unsigned NumOps = MI.getNumOperands(); + // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned CurOp = 0; - bool IsDestMem = false; - switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); - case X86II::MRMDestMem: - IsDestMem = true; - // The important info for the VEX prefix is never beyond the address - // registers. Don't check beyond that. - NumOps = CurOp = X86::AddrNumOperands; + case X86II::MRMDestMem: { + // MRMDestMem instructions forms: + // MemAddr, src1(ModR/M) + // MemAddr, src1(VEX_4V), src2(ModR/M) + // MemAddr, src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + CurOp = X86::AddrNumOperands; + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + break; + } + case X86II::MRMSrcMem: { + // MRMSrcMem instructions forms: + // src1(ModR/M), MemAddr + // src1(ModR/M), src2(VEX_4V), MemAddr + // src1(ModR/M), MemAddr, imm8 + // src1(ModR/M), MemAddr, src2(VEX_I8IMM) + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_R = 0x0; + + unsigned MemAddrOffset = 1; + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, 1); + MemAddrOffset++; + } + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; + } case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMSrcMem: + // MRM[0-9]m instructions forms: + // MemAddr + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; case X86II::MRMSrcReg: - if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + // MRMSrcReg instructions forms: + // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M), src1(ModR/M) + // dst(ModR/M), src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; - if (HasVEX_4V) { - VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp); - CurOp++; - } - - // To only check operands before the memory address ones, start - // the search from the beginning - if (IsDestMem) - CurOp = 0; - - // If the last register should be encoded in the immediate field - // do not use any bit from VEX prefix to this register, ignore it - if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) - NumOps--; - - for (; CurOp != NumOps; ++CurOp) { - const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_B = 0x0; - if (!VEX_B && MO.isReg() && - ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && - X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_X = 0x0; - } - break; - default: // MRMDestReg, MRM0r-MRM7r, RawFrm - if (!MI.getNumOperands()) - break; - - if (MI.getOperand(CurOp).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) - VEX_B = 0; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); - - CurOp++; - for (; CurOp != NumOps; ++CurOp) { - const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && !HasVEX_4V && - X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_R = 0x0; - } + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: + // dst(ModR/M), src(ModR/M) + // dst(ModR/M), src(ModR/M), imm8 + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_R = 0x0; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 + VEX_4V = getVEXRegisterEncoding(MI, 0); + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_B = 0x0; + break; + default: // RawFrm break; } @@ -604,7 +639,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const MCOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; + if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. REX |= 0x40; // REX fixed encoding prefix @@ -615,25 +650,25 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 2; // set REX.R i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << 0; // set REX.B } break; case X86II::MRMSrcMem: { if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 2; // set REX.R unsigned Bit = 0; i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { - if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1) Bit++; } @@ -648,13 +683,13 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && MI.getOperand(e).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg())) REX |= 1 << 2; // set REX.R unsigned Bit = 0; for (; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { - if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1) Bit++; } @@ -663,12 +698,12 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } default: if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 0; // set REX.B i = isTwoAddr ? 2 : 1; for (unsigned e = NumOps; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << 2; // set REX.R } break; @@ -731,7 +766,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags & X86II::AdSize) || (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) EmitByte(0x67, CurByte, OS); - + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); @@ -834,7 +869,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; - // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -844,12 +878,11 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); - unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); - + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) BaseOpcode = 0x0F; // Weird 3DNow! encoding. - + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: @@ -861,7 +894,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; - case X86II::RawFrmImm8: EmitByte(BaseOpcode, CurByte, OS); EmitImmediate(MI.getOperand(CurOp++), @@ -1006,8 +1038,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); - bool IsExtReg = - X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); + bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); unsigned RegNum = (IsExtReg ? (1 << 7) : 0); RegNum |= GetX86RegNum(MO) << 4; EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, @@ -1030,7 +1061,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); - #ifndef NDEBUG // FIXME: Verify. diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index b77f37b..f98d5e3 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -13,12 +13,18 @@ #include "X86MCTargetDesc.h" #include "X86MCAsmInfo.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" #define GET_REGINFO_MC_DESC #include "X86GenRegisterInfo.inc" @@ -34,9 +40,16 @@ using namespace llvm; std::string X86_MC::ParseX86Triple(StringRef TT) { Triple TheTriple(TT); + std::string FS; if (TheTriple.getArch() == Triple::x86_64) - return "+64bit-mode"; - return "-64bit-mode"; + FS = "+64bit-mode"; + else + FS = "-64bit-mode"; + if (TheTriple.getOS() == Triple::NativeClient) + FS += ",+nacl-mode"; + else + FS += ",-nacl-mode"; + return FS; } /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the @@ -107,6 +120,135 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, } } +unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::x86_64) + return DWARFFlavour::X86_64; + + if (TheTriple.isOSDarwin()) + return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic; + if (TheTriple.getOS() == Triple::MinGW32 || + TheTriple.getOS() == Triple::Cygwin) + // Unsupported by now, just quick fallback + return DWARFFlavour::X86_32_Generic; + return DWARFFlavour::X86_32_Generic; +} + +/// getX86RegNum - This function maps LLVM register identifiers to their X86 +/// specific numbering, which is used in various places encoding instructions. +unsigned X86_MC::getX86RegNum(unsigned RegNo) { + switch(RegNo) { + case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; + case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; + case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; + case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; + case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: + return N86::ESP; + case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: + return N86::EBP; + case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: + return N86::ESI; + case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: + return N86::EDI; + + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + return N86::EAX; + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + return N86::ECX; + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + return N86::EDX; + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + return N86::EBX; + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + return N86::ESP; + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + return N86::EBP; + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + return N86::ESI; + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + return N86::EDI; + + case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: + case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: + return RegNo-X86::ST0; + + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: + return 0; + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: + return 1; + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: + return 2; + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: + return 3; + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: + return 4; + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: + return 5; + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: + return 6; + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: + return 7; + + case X86::ES: return 0; + case X86::CS: return 1; + case X86::SS: return 2; + case X86::DS: return 3; + case X86::FS: return 4; + case X86::GS: return 5; + + case X86::CR0: case X86::CR8 : case X86::DR0: return 0; + case X86::CR1: case X86::CR9 : case X86::DR1: return 1; + case X86::CR2: case X86::CR10: case X86::DR2: return 2; + case X86::CR3: case X86::CR11: case X86::DR3: return 3; + case X86::CR4: case X86::CR12: case X86::DR4: return 4; + case X86::CR5: case X86::CR13: case X86::DR5: return 5; + case X86::CR6: case X86::CR14: case X86::DR6: return 6; + case X86::CR7: case X86::CR15: case X86::DR7: return 7; + + // Pseudo index registers are equivalent to a "none" + // scaled index (See Intel Manual 2A, table 2-3) + case X86::EIZ: + case X86::RIZ: + return 4; + + default: + assert((int(RegNo) > 0) && "Unknown physical register!"); + return 0; + } +} + +void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) { + // FIXME: TableGen these. + for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) { + int SEH = X86_MC::getX86RegNum(Reg); + switch (Reg) { + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + SEH += 8; + break; + } + MRI->mapLLVMRegToSEHReg(Reg, SEH); + } +} + MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { std::string ArchFS = X86_MC::ParseX86Triple(TT); @@ -131,55 +273,191 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -// Force static initialization. -extern "C" void LLVMInitializeX86MCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, - X86_MC::createX86MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, - X86_MC::createX86MCSubtargetInfo); -} - static MCInstrInfo *createX86MCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitX86MCInstrInfo(X); return X; } -extern "C" void LLVMInitializeX86MCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); -} +static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { + Triple TheTriple(TT); + unsigned RA = (TheTriple.getArch() == Triple::x86_64) + ? X86::RIP // Should have dwarf #16. + : X86::EIP; // Should have dwarf #8. -static MCRegisterInfo *createX86MCRegisterInfo() { MCRegisterInfo *X = new MCRegisterInfo(); - InitX86MCRegisterInfo(X); + InitX86MCRegisterInfo(X, RA, + X86_MC::getDwarfRegFlavour(TT, false), + X86_MC::getDwarfRegFlavour(TT, true)); + X86_MC::InitLLVM2SEHRegisterMapping(X); return X; } -extern "C" void LLVMInitializeX86MCRegInfo() { - TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); -} - - static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); + bool is64Bit = TheTriple.getArch() == Triple::x86_64; + MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { - if (TheTriple.getArch() == Triple::x86_64) - return new X86_64MCAsmInfoDarwin(TheTriple); + if (is64Bit) + MAI = new X86_64MCAsmInfoDarwin(TheTriple); else - return new X86MCAsmInfoDarwin(TheTriple); + MAI = new X86MCAsmInfoDarwin(TheTriple); + } else if (TheTriple.isOSWindows()) { + MAI = new X86MCAsmInfoCOFF(TheTriple); + } else { + MAI = new X86ELFMCAsmInfo(TheTriple); } + // Initialize initial frame state. + // Calculate amount of bytes used for return address storing + int stackGrowth = is64Bit ? -8 : -4; + + // Initial state of the frame pointer is esp+stackGrowth. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth); + MAI->addInitialFrameState(0, Dst, Src); + + // Add return address to move list + MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth); + MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP); + MAI->addInitialFrameState(0, CSDst, CSSrc); + + return MAI; +} + +static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + Triple T(TT); + bool is64Bit = T.getArch() == Triple::x86_64; + + if (RM == Reloc::Default) { + // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. + // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we + // use static relocation model by default. + if (T.isOSDarwin()) { + if (is64Bit) + RM = Reloc::PIC_; + else + RM = Reloc::DynamicNoPIC; + } else if (T.isOSWindows() && is64Bit) + RM = Reloc::PIC_; + else + RM = Reloc::Static; + } + + // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC + // is defined as a model for code which may be used in static or dynamic + // executables but not necessarily a shared library. On X86-32 we just + // compile in -static mode, in x86-64 we use PIC. + if (RM == Reloc::DynamicNoPIC) { + if (is64Bit) + RM = Reloc::PIC_; + else if (!T.isOSDarwin()) + RM = Reloc::Static; + } + + // If we are on Darwin, disallow static relocation model in X86-64 mode, since + // the Mach-O file format doesn't support it. + if (RM == Reloc::Static && T.isOSDarwin() && is64Bit) + RM = Reloc::PIC_; + + // For static codegen, if we're not already set, use Small codegen. + if (CM == CodeModel::Default) + CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) + // 64-bit JIT places everything in the same buffer except external funcs. + CM = is64Bit ? CodeModel::Large : CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) + return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); + if (TheTriple.isOSWindows()) - return new X86MCAsmInfoCOFF(TheTriple); + return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll); + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createX86MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new X86ATTInstPrinter(MAI); + if (SyntaxVariant == 1) + return new X86IntelInstPrinter(MAI); + return 0; +} - return new X86ELFMCAsmInfo(TheTriple); +static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) { + return new MCInstrAnalysis(Info); } -extern "C" void LLVMInitializeX86MCAsmInfo() { - // Register the target asm info. +// Force static initialization. +extern "C" void LLVMInitializeX86TargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); + + // Register the MC codegen info. + RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo); + RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, + X86_MC::createX86MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, + X86_MC::createX86MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target, + createX86MCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target, + createX86MCInstrAnalysis); + + // Register the code emitter. + TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target, + createX86MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target, + createX86MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheX86_64Target, + createX86_64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target, + createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target, + createX86MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheX86_64Target, + createX86MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 89ea22b..c144c51 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -14,15 +14,39 @@ #ifndef X86MCTARGETDESC_H #define X86MCTARGETDESC_H +#include "llvm/Support/DataTypes.h" #include <string> namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_ostream; extern Target TheX86_32Target, TheX86_64Target; +/// DWARFFlavour - Flavour of dwarf regnumbers +/// +namespace DWARFFlavour { + enum { + X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 + }; +} + +/// N86 namespace - Native X86 register numbers +/// +namespace N86 { + enum { + EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 + }; +} + namespace X86_MC { std::string ParseX86Triple(StringRef TT); @@ -33,13 +57,32 @@ namespace X86_MC { void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); - /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance. + unsigned getDwarfRegFlavour(StringRef TT, bool isEH); + + unsigned getX86RegNum(unsigned RegNo); + + void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); + + /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance. /// This is exposed so Asm parser, etc. do not need to go through /// TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } +MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT); + +/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 3711038..f0f1982 100644 --- a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "X86.h" -#include "X86FixupKinds.h" -#include "llvm/ADT/Twine.h" +#include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Object/MachOFormat.h" diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp index 08d4d84..52a67f7 100644 --- a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp +++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -9,7 +9,7 @@ #include "X86.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheX86_32Target, llvm::TheX86_64Target; diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt deleted file mode 100644 index 3ad5f99..0000000 --- a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86Utils - X86ShuffleDecode.cpp - ) -add_dependencies(LLVMX86Utils X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile deleted file mode 100644 index 1df6f0f..0000000 --- a/contrib/llvm/lib/Target/X86/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Utils - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index cd06060..aeb3309 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -167,24 +167,77 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts / 2; - for (unsigned s = 0; s < NumSections; ++s) { + unsigned End = NumLaneElts / 2; + for (unsigned s = 0; s < NumLanes; ++s) { for (unsigned i = Start; i != End; ++i) { ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } } +// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes and the mask of the first lane must +// be the same of the second. +void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumLanes = (NumElts*32)/128; + unsigned LaneSize = NumElts/NumLanes; + + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = 0; i != LaneSize; ++i) { + unsigned Idx = (Imm >> (i*2)) & 0x3 ; + ShuffleMask.push_back(Idx+(l*LaneSize)); + } + } +} + +// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes but the mask of the first lane can +// be the different of the second (not like VPERMILPS). +void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumLanes = (NumElts*64)/128; + unsigned LaneSize = NumElts/NumLanes; + + for (unsigned l = 0; l < NumLanes; ++l) { + for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { + unsigned Idx = (Imm >> i) & 0x1; + ShuffleMask.push_back(Idx+(l*LaneSize)); + } + } +} + +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned HalfSize = VT.getVectorNumElements()/2; + unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; + unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; + + for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); + for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); +} + +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only + // has information about the instruction and not the types. So for + // instruction comments purpose, assume the 256-bit vector is v4i64. + return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask); +} + } // llvm namespace diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index b18f670..58193e6 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -82,6 +82,26 @@ void DecodeUNPCKLPDMask(unsigned NElts, void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); + +// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes and the mask of the first lane must +// be the same of the second. +void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes but the mask of the first lane can +// be the different of the second (not like VPERMILPS). +void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + } // llvm namespace #endif diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index ec52dfb..81e9422 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -24,16 +25,8 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; class MachineCodeEmitter; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCObjectWriter; -class MCSubtargetInfo; class Target; -class TargetAsmBackend; class X86TargetMachine; -class formatted_raw_ostream; -class raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -51,22 +44,16 @@ FunctionPass* createGlobalBaseRegPass(); /// FunctionPass *createX86FloatingPointStackifierPass(); -/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain -/// crossings. -FunctionPass *createSSEDomainFixPass(); +/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions +/// before each call to avoid transition penalty between functions encoded with +/// AVX and SSE. +FunctionPass *createX86IssueVZeroUpperPass(); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &); -TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); - /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically /// allocated chunk of memory. @@ -79,13 +66,6 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentHeuristicPass(); - -/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. -MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); - } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 4ccb43f..104b91f 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This is a target description file for the Intel i386 architecture, referred to -// here as the "X86" architecture. +// This is a target description file for the Intel i386 architecture, referred +// to here as the "X86" architecture. // //===----------------------------------------------------------------------===// @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; +def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true", + "Native Client mode">; + //===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// @@ -68,6 +71,9 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions", [FeatureCMOV]>; +def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true", + "64-bit with cmpxchg16b", + [Feature64Bit]>; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", @@ -90,6 +96,16 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "Allow unaligned memory operands on vector/SIMD instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions">; +def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", + "Support MOVBE instruction">; +def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", + "Support RDRAND instruction">; +def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", + "Support 16-bit floating point conversion instructions">; +def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", + "Support LZCNT instruction">; +def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", + "Support BMI instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -112,27 +128,43 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE, + FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 -def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES]>; -def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; +def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>; +def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES, FeatureCLMUL]>; +def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem, FeatureAES, + FeatureCLMUL]>; +// Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. // FIXME: Disabling AVX for now since it's not ready. -def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit, +def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, FeatureCLMUL]>; +// Ivy Bridge +def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, + FeatureRDRAND, FeatureF16C]>; + +// Haswell +def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, + FeatureCLMUL, FeatureRDRAND, FeatureF16C, + FeatureFMA3, FeatureMOVBE, FeatureLZCNT, + FeatureBMI]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -150,19 +182,21 @@ def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; + Feature3DNowA, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A, - Feature3DNowA]>; -def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B, + FeatureSSE4A, Feature3DNowA]>; +def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp index 99b4479..4c3ff02 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -35,12 +35,12 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/SmallString.h" using namespace llvm; @@ -504,8 +504,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .indirect_symbol _foo OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(), MCSA_IndirectSymbol); - // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12. - const char HltInsts[] = { -12, -12, -12, -12, -12 }; + // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. + const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } @@ -708,21 +708,8 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // Target Registry Stuff //===----------------------------------------------------------------------===// -static MCInstPrinter *createX86MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI); - if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeX86AsmPrinter() { RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target); RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target); - - TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp index 4b11db7..aeff03a 100644 --- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -98,8 +98,6 @@ namespace { void emitMemModRMByte(const MachineInstr &MI, unsigned Op, unsigned RegOpcodeField, intptr_t PCAdj = 0); - - unsigned getX86RegNum(unsigned RegNo) const; }; template<class CodeEmitter> @@ -169,7 +167,7 @@ static unsigned determineREX(const MachineInstr &MI) { const MachineOperand& MO = MI.getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); - if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) + if (X86II::isX86_64NonExtLowByteReg(Reg)) REX |= 0x40; } } @@ -346,11 +344,6 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, MCE.emitWordLE(0); } -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const { - return X86RegisterInfo::getX86RegNum(RegNo); -} - inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); @@ -360,7 +353,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, template<class CodeEmitter> void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){ - MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); + MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg))); } template<class CodeEmitter> @@ -498,7 +491,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // 2-7) and absolute references. unsigned BaseRegNo = -1U; if (BaseReg != 0 && BaseReg != X86::RIP) - BaseRegNo = getX86RegNum(BaseReg); + BaseRegNo = X86_MC::getX86RegNum(BaseReg); if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && @@ -566,7 +559,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, } // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { @@ -574,15 +567,15 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); + IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg); unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); + IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); else IndexRegNo = 4; // For example [ESP+1*<noreg>+4] emitSIBByte(SS, IndexRegNo, BaseRegNo); @@ -809,7 +802,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, } case X86II::AddRegFrm: { - MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); + MCE.emitByte(BaseOpcode + + X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); if (CurOp == NumOps) break; @@ -844,7 +838,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -854,7 +848,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, - getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) + X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) .getReg())); CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) @@ -866,7 +860,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMSrcReg: MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp+1).getReg(), - getX86RegNum(MI.getOperand(CurOp).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -880,8 +874,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, X86II::getSizeOfImm(Desc->TSFlags) : 0; MCE.emitByte(BaseOpcode); - emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), - PCAdj); + emitMemModRMByte(MI, CurOp+1, + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); CurOp += AddrOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -968,7 +962,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp index f1d7ede..4a72d15 100644 --- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp @@ -147,7 +147,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) return SymOffset - (RelOffset + 4); else - assert("computeRelocation unknown for this relocation type"); + assert(0 && "computeRelocation unknown for this relocation type"); return 0; } diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 21e163a..f912b28 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -22,6 +22,7 @@ #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" @@ -59,8 +60,8 @@ public: explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); + X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -134,7 +135,7 @@ private: (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); bool IsMemcpySmall(uint64_t Len); @@ -144,7 +145,7 @@ private: } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { +bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -198,8 +199,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, RC = X86::GR64RegisterClass; break; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::MOVSSrm; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp32m; @@ -207,8 +208,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::MOVSDrm; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp64m; @@ -250,10 +251,12 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { case MVT::i32: Opc = X86::MOV32mr; break; case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. case MVT::f32: - Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; + Opc = X86ScalarSSEf32 ? + (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; break; case MVT::f64: - Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; + Opc = X86ScalarSSEf64 ? + (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; } @@ -336,7 +339,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { U = C; } - if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) + if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -399,7 +402,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); continue; @@ -465,14 +468,23 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Handle constant address. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // Can't handle alternate code models or TLS yet. + // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; + // Can't handle TLS yet. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal()) return false; + // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how + // it works...). + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + if (const GlobalVariable *GVar = + dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false))) + if (GVar->isThreadLocal()) + return false; + // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the // global value into its own register, which we can use as the basereg. @@ -658,6 +670,10 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(const Instruction *I) { + // Atomic stores need special handling. + if (cast<StoreInst>(I)->isAtomic()) + return false; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -780,6 +796,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { + // Atomic loads need special handling. + if (cast<LoadInst>(I)->isAtomic()) + return false; + MVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -797,14 +817,20 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { } static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { + bool HasAVX = Subtarget->hasAVX(); + bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1(); + bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2(); + switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; - case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; - case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; + case MVT::f32: + return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; + case MVT::f64: + return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; } } @@ -1207,7 +1233,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. - if (Subtarget->hasSSE2() && + if (X86ScalarSSEf64 && I->getType()->isDoubleTy()) { const Value *V = I->getOperand(0); if (V->getType()->isFloatTy()) { @@ -1226,7 +1252,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { } bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { - if (Subtarget->hasSSE2()) { + if (X86ScalarSSEf64) { if (I->getType()->isFloatTy()) { const Value *V = I->getOperand(0); if (V->getType()->isDoubleTy()) { @@ -1365,6 +1391,9 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { case Intrinsic::memset: { const MemSetInst &MSI = cast<MemSetInst>(I); + if (MSI.isVolatile()) + return false; + unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth)) return false; @@ -1411,7 +1440,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Replace "add with overflow" intrinsics with an "add" instruction followed // by a seto/setc instruction. const Function *Callee = I.getCalledFunction(); - const Type *RetTy = + Type *RetTy = cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); MVT VT; @@ -1484,8 +1513,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; - const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); bool isVarArg = FTy->isVarArg(); // Don't know how to handle Win64 varargs yet. Nothing special needed for @@ -1547,8 +1576,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { Flags.setZExt(); if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { - const PointerType *Ty = cast<PointerType>(ArgVal->getType()); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(ArgVal->getType()); + Type *ElementTy = Ty->getElementType(); unsigned FrameSize = TD.getTypeAllocSize(ElementTy); unsigned FrameAlign = CS.getParamAlignment(AttrInd); if (!FrameAlign) @@ -1600,7 +1629,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (ArgReg == 0) return false; - const Type *ArgTy = ArgVal->getType(); + Type *ArgTy = ArgVal->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1709,7 +1738,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { assert(Res && "memcpy length already checked!"); (void)Res; } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { // If this is a really simple value, emit this with the Value* version - //of X86FastEmitStore. If it isn't simple, we don't want to do this, + // of X86FastEmitStore. If it isn't simple, we don't want to do this, // as it can cause us to reevaluate the argument. X86FastEmitStore(ArgVT, ArgVal, AM); } else { @@ -1965,8 +1994,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { RC = X86::GR64RegisterClass; break; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::MOVSSrm; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp32m; @@ -1974,8 +2003,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::MOVSDrm; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp64m; @@ -2070,8 +2099,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { switch (VT.SimpleTy) { default: return false; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::FsFLD0SS; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp032; @@ -2079,8 +2108,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::FsFLD0SD; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp064; diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index 6eed6abd..e3461c8 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -260,6 +260,21 @@ namespace { BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); } + /// duplicatePendingSTBeforeKill - The instruction at I is about to kill + /// RegNo. If any PendingST registers still need the RegNo value, duplicate + /// them to new scratch registers. + void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) { + for (unsigned i = 0; i != NumPendingSTs; ++i) { + if (PendingST[i] != RegNo) + continue; + unsigned SR = getScratchReg(); + DEBUG(dbgs() << "Duplicating pending ST" << i + << " in FP" << RegNo << " to FP" << SR << '\n'); + duplicateToTop(RegNo, SR, I); + PendingST[i] = SR; + } + } + /// popStackAfter - Pop the current value off of the top of the FP stack /// after the specified instruction. void popStackAfter(MachineBasicBlock::iterator &I); @@ -406,6 +421,10 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI->isCopy() && isFPCopy(MI)) FPInstClass = X86II::SpecialFP; + if (MI->isImplicitDef() && + X86::RFP80RegClass.contains(MI->getOperand(0).getReg())) + FPInstClass = X86II::SpecialFP; + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -461,6 +480,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { } dumpStack(); ); + (void)PrevMI; Changed = true; } @@ -969,6 +989,9 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); bool KillsSrc = MI->killsRegister(X86::FP0+Reg); + if (KillsSrc) + duplicatePendingSTBeforeKill(Reg, I); + // FISTP64m is strange because there isn't a non-popping versions. // If we have one _and_ we don't want to pop the operand, duplicate the value // on the stack instead of moving it. This ensure that popping the value is @@ -1032,6 +1055,7 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { bool KillsSrc = MI->killsRegister(X86::FP0+Reg); if (KillsSrc) { + duplicatePendingSTBeforeKill(Reg, I); // If this is the last use of the source register, just make sure it's on // the top of the stack. moveToTop(Reg, I); @@ -1318,6 +1342,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // When the source is killed, allocate a scratch FP register. if (KillsSrc) { + duplicatePendingSTBeforeKill(SrcFP, I); unsigned Slot = getSlot(SrcFP); unsigned SR = getScratchReg(); PendingST[DstST] = SR; @@ -1369,6 +1394,15 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { break; } + case TargetOpcode::IMPLICIT_DEF: { + // All FP registers must be explicitly defined, so load a 0 instead. + unsigned Reg = MI->getOperand(0).getReg() - X86::FP0; + DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n'); + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); + pushReg(Reg); + break; + } + case X86::FpPOP_RETVAL: { // The FpPOP_RETVAL instruction is used after calls that return a value on // the floating point stack. We cannot model this with ST defs since CALL diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index ed45a9a..d54f4ae 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -15,6 +15,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -91,12 +92,12 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; static const unsigned CallerSavedRegs32Bit[] = { - X86::EAX, X86::EDX, X86::ECX + X86::EAX, X86::EDX, X86::ECX, 0 }; static const unsigned CallerSavedRegs64Bit[] = { X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, - X86::R8, X86::R9, X86::R10, X86::R11 + X86::R8, X86::R9, X86::R10, X86::R11, 0 }; unsigned Opc = MBBI->getOpcode(); @@ -283,8 +284,8 @@ static bool isEAXLiveIn(MachineFunction &MF) { } void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { + MCSymbol *Label, + unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -346,6 +347,247 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, } } +/// getCompactUnwindRegNum - Get the compact unwind number for a given +/// register. The number corresponds to the enum lists in +/// compact_unwind_encoding.h. +static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { + int Idx = 1; + for (; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; +} + +/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding +/// used with frameless stacks. It is passed the number of registers to be saved +/// and an array of the registers saved. +static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], + unsigned RegCount, + bool Is64Bit) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + uint32_t RenumRegs[6]; + for (unsigned i = 6 - RegCount; i < 6; ++i) { + int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + + unsigned Countless = 0; + for (unsigned j = 6 - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; +} + +/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a +/// compact encoding with a frame pointer. +static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], + bool Is64Bit) { + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + // Encode the registers in the order they were saved, 3-bits per register. The + // registers are numbered from 1 to 6. + uint32_t RegEnc = 0; + for (int I = 5; I >= 0; --I) { + unsigned Reg = SavedRegs[I]; + if (Reg == 0) break; + int CURegNum = getCompactUnwindRegNum(CURegs, Reg); + if (CURegNum == -1) + return ~0U; + RegEnc |= (CURegNum & 0x7) << (5 - I); + } + + assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); + return RegEnc; +} + +uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + bool Is64Bit = STI.is64Bit(); + bool HasFP = hasFP(MF); + + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + int SavedRegIdx = 6; + + unsigned OffsetSize = (Is64Bit ? 8 : 4); + + unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); + unsigned PushInstrSize = 1; + unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); + unsigned MoveInstrSize = (Is64Bit ? 3 : 2); + unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); + unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); + + unsigned StackDivide = (Is64Bit ? 8 : 4); + + unsigned InstrOffset = 0; + unsigned CFAOffset = 0; + unsigned StackAdjust = 0; + + MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. + bool ExpectEnd = false; + for (MachineBasicBlock::iterator + MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opc = MI.getOpcode(); + if (Opc == X86::PROLOG_LABEL) continue; + if (!MI.getFlag(MachineInstr::FrameSetup)) break; + + // We don't exect any more prolog instructions. + if (ExpectEnd) return 0; + + if (Opc == PushInstr) { + // If there are too many saved registers, we cannot use compact encoding. + if (--SavedRegIdx < 0) return 0; + + SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); + CFAOffset += OffsetSize; + InstrOffset += PushInstrSize; + } else if (Opc == MoveInstr) { + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + + if (DstReg != FramePtr || SrcReg != StackPtr) + return 0; + + CFAOffset = 0; + memset(SavedRegs, 0, sizeof(SavedRegs)); + InstrOffset += MoveInstrSize; + } else if (Opc == SubtractInstr) { + if (StackAdjust) + // We all ready have a stack pointer adjustment. + return 0; + + if (!MI.getOperand(0).isReg() || + MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || + MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) + // We need this to be a stack adjustment pointer. Something like: + // + // %RSP<def> = SUB64ri8 %RSP, 48 + return 0; + + StackAdjust = MI.getOperand(2).getImm() / StackDivide; + SubtractInstrIdx += InstrOffset; + ExpectEnd = true; + } + } + + // Encode that we are using EBP/RBP as the frame pointer. + uint32_t CompactUnwindEncoding = 0; + CFAOffset /= StackDivide; + if (HasFP) { + if ((CFAOffset & 0xFF) != CFAOffset) + // Offset was too big for compact encoding. + return 0; + + // Get the encoding of the saved registers when we have a frame pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); + if (RegEnc == ~0U) + return 0; + + CompactUnwindEncoding |= 0x01000000; + CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; + CompactUnwindEncoding |= RegEnc & 0x7FFF; + } else { + unsigned FullOffset = CFAOffset + StackAdjust; + if ((FullOffset & 0xFF) == FullOffset) { + // Frameless stack. + CompactUnwindEncoding |= 0x02000000; + CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; + } else { + if ((CFAOffset & 0x7) != CFAOffset) + // The extra stack adjustments are too big for us to handle. + return 0; + + // Frameless stack with an offset too large for us to encode compactly. + CompactUnwindEncoding |= 0x03000000; + + // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' + // instruction. + CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; + + // Encode any extra stack stack changes (done via push instructions). + CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; + } + + // Get the encoding of the saved registers when we don't have a frame + // pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, + 6 - SavedRegIdx, + Is64Bit); + if (RegEnc == ~0U) return 0; + CompactUnwindEncoding |= RegEnc & 0x3FF; + } + + return CompactUnwindEncoding; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -370,7 +612,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -398,7 +639,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - !IsWin64) { // Win64 has no Red Zone + !IsWin64 && // Win64 has no Red Zone + !EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -459,7 +701,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -478,7 +721,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); } - // Update EBP with the new base value... + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) @@ -487,7 +730,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -504,8 +748,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (RegInfo->needsStackRealignment(MF)) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), - StackPtr).addReg(StackPtr).addImm(-MaxAlign); + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) + .addReg(StackPtr) + .addImm(-MaxAlign) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -522,6 +768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; + MBBI->setFlag(MachineInstr::FrameSetup); ++MBBI; if (!HasFP && needsFrameMoves) { @@ -530,8 +777,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. - unsigned Ptr = StackSize ? - MachineLocation::VirtualFP : StackPtr; + unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr; MachineLocation SPDst(Ptr); MachineLocation SPSrc(Ptr, StackOffset); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); @@ -586,26 +832,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); + .addReg(X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes); + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) .addExternalSymbol(StackProbeSymbol) .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. @@ -618,6 +868,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), StackPtr, false, NumBytes - 4); + MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } } else if (NumBytes) @@ -627,7 +878,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -647,6 +899,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (PushedRegs) emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); } + + // Darwin 10.7 and greater has support for compact unwind encoding. + if (STI.getTargetTriple().isMacOSX() && + !STI.getTargetTriple().isMacOSXVersionLT(10, 7)) + MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); } void X86FrameLowering::emitEpilogue(MachineFunction &MF, @@ -844,23 +1101,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -void -X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Calculate amount of bytes used for return address storing - int stackGrowth = (STI.is64Bit() ? -8 : -4); - const X86RegisterInfo *RI = TM.getRegisterInfo(); - - // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(RI->getStackRegister(), stackGrowth); - Moves.push_back(MachineMove(0, Dst, Src)); - - // Add return address to move list - MachineLocation CSDst(RI->getStackRegister(), stackGrowth); - MachineLocation CSSrc(RI->getRARegister()); - Moves.push_back(MachineMove(0, CSDst, CSSrc)); -} - int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const X86RegisterInfo *RI = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); @@ -873,9 +1113,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con // Skip the saved EBP. Offset += RI->getSlotSize(); } else { - unsigned Align = MFI->getObjectAlignment(FI); - assert((-(Offset + StackSize)) % Align == 0); - Align = 0; + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } // FIXME: Support tail calls @@ -1027,184 +1265,183 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); - FrameIdx = 0; + (void)FrameIdx; } } -/// permuteEncode - Create the permutation encoding used with frameless -/// stacks. It is passed the number of registers to be saved and an array of the -/// registers saved. -static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { - // The saved registers are numbered from 1 to 6. In order to encode the order - // in which they were saved, we re-number them according to their place in the - // register order. The re-numbering is relative to the last re-numbered - // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - bool Used[7] = { false, false, false, false, false, false, false }; - uint32_t RenumRegs[6]; - for (unsigned I = 0; I < SavedCount; ++I) { - uint32_t Renum = 0; - for (unsigned U = 1; U < 7; ++U) { - if (U == Registers[I]) - break; - if (!Used[U]) - ++Renum; - } - - Used[Registers[I]] = true; - RenumRegs[I] = Renum; +static bool +HasNestArgument(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) { + if (I->hasNestAttr()) + return true; } + return false; +} - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (SavedCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] - + 3 * RenumRegs[2] + RenumRegs[3]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] - + RenumRegs[2]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; - break; - case 1: - permutationEncoding |= RenumRegs[0]; - break; +static unsigned +GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { + if (Is64Bit) { + return X86::R11; + } else { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall) { + if (IsNested) { + report_fatal_error("Segmented stacks does not support fastcall with " + "nested function."); + return -1; + } else { + return X86::EAX; + } + } else { + if (IsNested) + return X86::EDX; + else + return X86::ECX; + } } - - return permutationEncoding; } -uint32_t X86FrameLowering:: -getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, - int DataAlignmentFactor, bool IsEH) const { - uint32_t Encoding = 0; - int CFAOffset = 0; - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; - unsigned SavedRegIdx = 0; - int FramePointerReg = -1; +void +X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + uint64_t StackSize; + bool Is64Bit = STI.is64Bit(); + unsigned TlsReg, TlsOffset; + DebugLoc DL; + const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>(); - for (ArrayRef<MCCFIInstruction>::const_iterator - I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { - const MCCFIInstruction &Inst = *I; - MCSymbol *Label = Inst.getLabel(); + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "Scratch register is live-in"); - // Ignore invalid labels. - if (Label && !Label->isDefined()) continue; + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetLinux()) + report_fatal_error("Segmented stacks supported only on linux."); - unsigned Operation = Inst.getOperation(); - if (Operation != MCCFIInstruction::Move && - Operation != MCCFIInstruction::RelMove) - // FIXME: We can't handle this frame just yet. - return 0; - - const MachineLocation &Dst = Inst.getDestination(); - const MachineLocation &Src = Inst.getSource(); - const bool IsRelative = (Operation == MCCFIInstruction::RelMove); - - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() != MachineLocation::VirtualFP) { - // DW_CFA_def_cfa - assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); - if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; - FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); - } // else DW_CFA_def_cfa_offset - - if (IsRelative) - CFAOffset += Src.getOffset(); - else - CFAOffset -= Src.getOffset(); + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool IsNested = false; - continue; - } + // We need to know if the function has a nest argument only in 64 bit mode. + if (Is64Bit) + IsNested = HasNestArgument(&MF); - if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - // DW_CFA_def_cfa_register - assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + // The MOV R10, RAX needs to be in a different block, since the RET we emit in + // allocMBB needs to be last (terminating) instruction. + MachineBasicBlock *restoreR10MBB = NULL; + if (IsNested) + restoreR10MBB = MF.CreateMachineBasicBlock(); - if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); i != e; i++) { + allocMBB->addLiveIn(*i); + checkMBB->addLiveIn(*i); - FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); - if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) - return 0; + if (IsNested) + restoreR10MBB->addLiveIn(*i); + } - SavedRegs[0] = 0; - SavedRegIdx = 0; - continue; - } + if (IsNested) { + allocMBB->addLiveIn(X86::R10); + restoreR10MBB->addLiveIn(X86::RAX); + } - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - if (IsRelative) - Offset -= CFAOffset; - Offset /= DataAlignmentFactor; + if (IsNested) + MF.push_front(restoreR10MBB); + MF.push_front(allocMBB); + MF.push_front(checkMBB); + + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Read the limit off the current stacklet off the stack_guard location. + if (Is64Bit) { + TlsReg = X86::FS; + TlsOffset = 0x70; + + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else { + TlsReg = X86::GS; + TlsOffset = 0x30; - if (Offset < 0) { - // FIXME: Handle? - // DW_CFA_offset_extended_sf - return 0; - } else if (Reg < 64) { - // DW_CFA_offset + Reg - if (SavedRegIdx >= 6) return 0; - int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); - if (CURegNum == -1) return 0; - SavedRegs[SavedRegIdx++] = CURegNum; - } else { - // FIXME: Handle? - // DW_CFA_offset_extended - return 0; - } + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } - // Bail if there are too many registers to encode. - if (SavedRegIdx > 6) return 0; + // This jump is taken if SP >= (Stacklet Limit + Stack Space required). + // It jumps to normal execution of the function body. + BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB); + + // On 32 bit we first push the arguments size and then the frame size. On 64 + // bit, we pass the stack frame size in r10 and the argument size in r11. + if (Is64Bit) { + // Functions with nested arguments use R10, so it needs to be saved across + // the call to _morestack + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); + + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) + .addImm(StackSize); + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) + .addImm(X86FI->getArgumentStackSize()); + MF.getRegInfo().setPhysRegUsed(X86::R10); + MF.getRegInfo().setPhysRegUsed(X86::R11); + } else { + // Since we'll call __morestack, stack alignment needs to be preserved. + BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(StackSize); + } - // Check if the offset is too big. - CFAOffset /= 4; - if ((CFAOffset & 0xFF) != CFAOffset) - return 0; - Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. - - if (FramePointerReg != -1) { - Encoding |= 0x01000000; // EBP/RBP Unwind Frame - for (unsigned I = 0; I != SavedRegIdx; ++I) { - unsigned Reg = SavedRegs[I]; - if (Reg == unsigned(FramePointerReg)) continue; - Encoding |= (Reg & 0x7) << (I * 3); // Register encoding - } + // __morestack is in libgcc + if (Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack"); + else + BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack"); + + // __morestack only seems to remove 8 bytes off the stack. Add back the + // additional 8 bytes we added before pushing the arguments. + if (!Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::RET)); + + if (IsNested) + BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10) + .addReg(X86::RAX); + + if (IsNested) { + allocMBB->addSuccessor(restoreR10MBB); + restoreR10MBB->addSuccessor(&prologueMBB); } else { - Encoding |= 0x02000000; // Frameless unwind with small stack - Encoding |= (SavedRegIdx & 0x7) << 10; - Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + allocMBB->addSuccessor(&prologueMBB); } - return Encoding; + checkMBB->addSuccessor(allocMBB); + checkMBB->addSuccessor(&prologueMBB); + +#ifdef XDEBUG + MF.verify(); +#endif } diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h index 14c31ed..6f49064 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h @@ -41,6 +41,8 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void adjustForSegmentedStacks(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; @@ -57,11 +59,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - - uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, - int DataAlignmentFactor, bool IsEH) const; + uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2b0f283..02b0ff2 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -474,10 +474,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - // If the source and destination are SSE registers, then this is a legal - // conversion that should not be lowered. EVT SrcVT = N->getOperand(0).getValueType(); EVT DstVT = N->getValueType(0); + + // If any of the sources are vectors, no fp stack involved. + if (SrcVT.isVector() || DstVT.isVector()) + continue; + + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) @@ -2168,9 +2173,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, Reg); - // Emit a testb. No special NOREX tricks are needed since there's - // only one GPR operand! - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only + // target GR8_NOREX registers, so make sure the register class is + // forced. + return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, Subreg, ShiftedImm); } diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 5096d9a..7c8ce17 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; using namespace dwarf; @@ -71,9 +72,6 @@ static SDValue Extract128BitVector(SDValue Vec, SelectionDAG &DAG, DebugLoc dl); -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG); - - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -85,14 +83,10 @@ static SDValue Extract128BitVector(SDValue Vec, DebugLoc dl) { EVT VT = Vec.getValueType(); assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); - EVT ElVT = VT.getVectorElementType(); - - int Factor = VT.getSizeInBits() / 128; - - EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), - ElVT, - VT.getVectorNumElements() / Factor); + int Factor = VT.getSizeInBits()/128; + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, + VT.getVectorNumElements()/Factor); // Extract from UNDEF is UNDEF. if (Vec.getOpcode() == ISD::UNDEF) @@ -111,7 +105,6 @@ static SDValue Extract128BitVector(SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -136,21 +129,18 @@ static SDValue Insert128BitVector(SDValue Result, assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - EVT ResultVT = Result.getValueType(); // Insert the relevant 128 bits. - unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + unsigned ElemsPerChunk = 128/ElVT.getSizeInBits(); // This is the index of the first element of the 128-bit chunk // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); return Result; @@ -159,34 +149,6 @@ static SDValue Insert128BitVector(SDValue Result, return SDValue(); } -/// Given two vectors, concat them. -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) { - DebugLoc dl = Lower.getDebugLoc(); - - assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!"); - - EVT VT = EVT::getVectorVT(*DAG.getContext(), - Lower.getValueType().getVectorElementType(), - Lower.getValueType().getVectorNumElements() * 2); - - // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors). - assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!"); - - // Insert the upper subvector. - SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, - DAG.getConstant( - // This is half the length of the result - // vector. Start inserting the upper 128 - // bits here. - Lower.getValueType().getVectorNumElements(), - MVT::i32), - DAG, dl); - - // Insert the lower subvector. - Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl); - return Vec; -} - static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); bool is64Bit = Subtarget->is64Bit(); @@ -197,11 +159,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { return new TargetLoweringObjectFileMachO(); } - if (Subtarget->isTargetELF()) { - if (is64Bit) - return new X8664_ELFTargetObjectFile(TM); - return new X8632_ELFTargetObjectFile(TM); - } + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) return new TargetLoweringObjectFileCOFF(); llvm_unreachable("unknown subtarget type"); @@ -222,6 +181,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); + // X86-SSE is even stranger. It uses -1 or 0 for vector masks. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. @@ -354,7 +315,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); } else if (!UseSoftFloat) { - if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) + // Since AVX is a superset of SSE3, only check for SSE here. + if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. @@ -417,15 +379,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ , MVT::i8 , Custom); - setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - setOperationAction(ISD::CTLZ , MVT::i32 , Custom); - if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTTZ , MVT::i64 , Custom); - setOperationAction(ISD::CTLZ , MVT::i64 , Custom); + if (Subtarget->hasBMI()) { + setOperationAction(ISD::CTTZ , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTTZ , MVT::i8 , Custom); + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTTZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + } + + if (Subtarget->hasLZCNT()) { + setOperationAction(ISD::CTLZ , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTLZ , MVT::i8 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } if (Subtarget->hasPOPCNT()) { @@ -491,8 +462,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasXMM()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - // We may not have a libcall for MEMBARRIER so we should lower this. setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory @@ -506,9 +477,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) MVT VT = IntVTs[i]; setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } if (!Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); @@ -518,6 +491,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } + if (Subtarget->hasCmpxchg16b()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + } + // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && @@ -539,7 +516,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -556,11 +534,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, - (Subtarget->is64Bit() ? MVT::i64 : MVT::i32), - (Subtarget->isTargetCOFF() - && !Subtarget->isTargetEnvMacho() - ? Custom : Expand)); + + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); + else if (EnableSegmentedStacks) + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Expand); if (!UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -739,7 +722,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); @@ -754,6 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand); for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) setTruncStoreAction((MVT::SimpleValueType)VT, @@ -816,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); + setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } if (!UseSoftFloat && Subtarget->hasXMMInt()) { @@ -846,10 +830,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v16i8, Custom); + setOperationAction(ISD::SETCC, MVT::v8i16, Custom); + setOperationAction(ISD::SETCC, MVT::v4i32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); @@ -925,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41()) { + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -944,6 +928,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional @@ -964,10 +954,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE2()) { + if (Subtarget->hasXMMInt()) { setOperationAction(ISD::SRL, MVT::v2i64, Custom); setOperationAction(ISD::SRL, MVT::v4i32, Custom); setOperationAction(ISD::SRL, MVT::v16i8, Custom); + setOperationAction(ISD::SRL, MVT::v8i16, Custom); setOperationAction(ISD::SHL, MVT::v2i64, Custom); setOperationAction(ISD::SHL, MVT::v4i32, Custom); @@ -977,15 +968,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); } - if (Subtarget->hasSSE42()) - setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); + if (Subtarget->hasSSE42() || Subtarget->hasAVX()) + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { - addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); - addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); - addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); @@ -1005,6 +997,59 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); + + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); + setOperationAction(ISD::SRL, MVT::v16i16, Custom); + setOperationAction(ISD::SRL, MVT::v32i8, Custom); + + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v16i16, Custom); + setOperationAction(ISD::SHL, MVT::v32i8, Custom); + + setOperationAction(ISD::SRA, MVT::v8i32, Custom); + setOperationAction(ISD::SRA, MVT::v16i16, Custom); + + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); + setOperationAction(ISD::SETCC, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v8i32, Custom); + setOperationAction(ISD::SETCC, MVT::v4i64, Custom); + + setOperationAction(ISD::SELECT, MVT::v4f64, Custom); + setOperationAction(ISD::SELECT, MVT::v4i64, Custom); + setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v16i16, Custom); + setOperationAction(ISD::ADD, MVT::v32i8, Custom); + + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v16i16, Custom); + setOperationAction(ISD::SUB, MVT::v32i8, Custom); + + setOperationAction(ISD::MUL, MVT::v4i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i32, Custom); + setOperationAction(ISD::MUL, MVT::v16i16, Custom); + // Don't lower v32i8 because there is no 128-bit byte mul + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1093,6 +1138,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); @@ -1100,7 +1146,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::FADD); + setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SINT_TO_FP); @@ -1124,25 +1173,26 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } -MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i8; +EVT X86TargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return MVT::i8; + return VT.changeVectorElementTypeToInteger(); } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. -static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { if (MaxAlign == 16) return; - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { if (VTy->getBitWidth() == 128) MaxAlign = 16; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; - } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { unsigned EltAlign = 0; getMaxByValAlign(STy->getElementType(i), EltAlign); @@ -1159,7 +1209,7 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { /// function arguments in the caller parameter area. For X86, aggregates /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. -unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { +unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { if (Subtarget->is64Bit()) { // Max of 8 and alignment of type. unsigned TyAlign = TD->getABITypeAlignment(Ty); @@ -1203,9 +1253,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16))) && Subtarget->getStackAlignment() >= 16) { - if (Subtarget->hasSSE2()) + if (Subtarget->hasAVX() && + Subtarget->getStackAlignment() >= 32) + return MVT::v8f32; + if (Subtarget->hasXMMInt()) return MVT::v4i32; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && @@ -1408,7 +1461,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. - if (!Subtarget->hasSSE2()) + if (!Subtarget->hasXMMInt()) ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } @@ -1700,6 +1753,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // places. assert(VA.getValNo() != LastVal && "Don't support value assigned to multiple locs yet"); + (void)LastVal; LastVal = VA.getValNo(); if (VA.isRegLoc()) { @@ -1917,6 +1971,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } + FuncInfo->setArgumentStackSize(StackSize); + return Chain; } @@ -2744,8 +2800,6 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -2754,10 +2808,17 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: return true; } return false; @@ -2783,6 +2844,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2796,6 +2861,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: + case X86ISD::VPERM2F128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2815,8 +2881,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -2825,6 +2889,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -3026,6 +3092,17 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } +/// isUndefOrInRange - Return true if every element in Mask, begining +/// from position Pos and ending in Pos+Size, falls within the specified +/// range (L, L+Pos]. or is undef. +static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low, int Hi) { + for (int i = Pos, e = Pos+Size; i != e; ++i) + if (!isUndefOrInRange(Mask[i], Low, Hi)) + return false; + return true; +} + /// isUndefOrEqual - Val is either less than zero (undef) or equal to the /// specified value. static bool isUndefOrEqual(int Val, int CmpVal) { @@ -3034,6 +3111,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return false; } +/// isSequentialOrUndefInRange - Return true if every element in Mask, begining +/// from position Pos and ending in Pos+Size, falls within the specified +/// sequential range (L, L+Pos]. or is undef. +static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low) { + for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (!isUndefOrEqual(Mask[i], Low)) + return false; + return true; +} + /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. @@ -3104,11 +3192,13 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PALIGNR. static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool hasSSSE3) { + bool hasSSSE3OrAVX) { int i, e = VT.getVectorNumElements(); + if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64) + return false; // Do not handle v2i64 / v2f64 shuffles with palignr. - if (e < 4 || !hasSSSE3) + if (e < 4 || !hasSSSE3OrAVX) return false; for (i = 0; i != e; ++i) @@ -3119,42 +3209,176 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, if (i == e) return false; - // Determine if it's ok to perform a palignr with only the LHS, since we - // don't have access to the actual shuffle elements to see if RHS is undef. - bool Unary = Mask[i] < (int)e; - bool NeedsUnary = false; + // Make sure we're shifting in the right direction. + if (Mask[i] <= i) + return false; int s = Mask[i] - i; // Check the rest of the elements to see if they are consecutive. for (++i; i != e; ++i) { int m = Mask[i]; - if (m < 0) - continue; + if (m >= 0 && m != s+i) + return false; + } + return true; +} + +/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// VSHUFPSY. +static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElems = VT.getVectorNumElements(); + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + if (NumElems != 8) + return false; - Unary = Unary && (m < (int)e); - NeedsUnary = NeedsUnary || (m < s); + // VSHUFPSY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0 + // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9 + // + // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, + // Y3..Y0, Y3..Y0, X3..X0, X3..X0 + // + int QuarterSize = NumElems/4; + int HalfSize = QuarterSize*2; + for (int i = 0; i < QuarterSize; ++i) + if (!isUndefOrInRange(Mask[i], 0, HalfSize)) + return false; + for (int i = QuarterSize; i < QuarterSize*2; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) + return false; - if (NeedsUnary && !Unary) + // The mask of the second half must be the same as the first but with + // the appropriate offsets. This works in the same way as VPERMILPS + // works with masks. + for (int i = QuarterSize*2; i < QuarterSize*3; ++i) { + if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) + return false; + int FstHalfIdx = i-HalfSize; + if (Mask[FstHalfIdx] < 0) + continue; + if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) return false; - if (Unary && m != ((s+i) & (e-1))) + } + for (int i = QuarterSize*3; i < NumElems; ++i) { + if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) return false; - if (!Unary && m != (s+i)) + int FstHalfIdx = i-HalfSize; + if (Mask[FstHalfIdx] < 0) + continue; + if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) return false; + } + return true; } -bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isPALIGNRMask(M, N->getValueType(0), true); +/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPSY instruction. +static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + assert(NumElems == 8 && VT.getSizeInBits() == 256 && + "Only supports v8i32 and v8f32 types"); + + int HalfSize = NumElems/2; + unsigned Mask = 0; + for (int i = 0; i != NumElems ; ++i) { + if (SVOp->getMaskElt(i) < 0) + continue; + // The mask of the first half must be equal to the second one. + unsigned Shamt = (i%HalfSize)*2; + unsigned Elt = SVOp->getMaskElt(i) % HalfSize; + Mask |= Elt << Shamt; + } + + return Mask; +} + +/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS +/// version and the mask of the second half isn't binded with the first +/// one. +static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElems = VT.getVectorNumElements(); + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + if (NumElems != 4) + return false; + + // VSHUFPSY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X3 X2 X1 X0 + // SRC2 => Y3 Y2 Y1 Y0 + // + // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0 + // + int QuarterSize = NumElems/4; + int HalfSize = QuarterSize*2; + for (int i = 0; i < QuarterSize; ++i) + if (!isUndefOrInRange(Mask[i], 0, HalfSize)) + return false; + for (int i = QuarterSize; i < QuarterSize*2; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) + return false; + for (int i = QuarterSize*2; i < QuarterSize*3; ++i) + if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) + return false; + for (int i = QuarterSize*3; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) + return false; + + return true; +} + +/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPDY instruction. +static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + assert(NumElems == 4 && VT.getSizeInBits() == 256 && + "Only supports v4i64 and v4f64 types"); + + int HalfSize = NumElems/2; + unsigned Mask = 0; + for (int i = 0; i != NumElems ; ++i) { + if (SVOp->getMaskElt(i) < 0) + continue; + int Elt = SVOp->getMaskElt(i) % HalfSize; + Mask |= Elt << i; + } + + return Mask; } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to SHUFP*. +/// specifies a shuffle of elements that is suitable for input to 128-bit +/// SHUFPS and SHUFPD. static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; + if (NumElems != 2 && NumElems != 4) return false; @@ -3204,7 +3428,13 @@ static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; + + if (NumElems != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 @@ -3218,15 +3448,19 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; if (NumElems != 4) return false; return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand @@ -3273,20 +3507,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = Start, j = s * NumSectionElts; + unsigned End = NumLaneElts; + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = Start, j = s * NumLaneElts; i != End; i += 2, ++j) { int BitI = Mask[i]; @@ -3302,8 +3538,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, } } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } return true; @@ -3320,21 +3556,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j + NumElts/2)) - return false; - if (V2IsSplat) { - if (isUndefOrEqual(BitI1, NumElts)) - return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + unsigned Start = 0; + unsigned End = NumLaneElts; + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2; + i != End; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j+NumElts)) + return false; + } } + // Process the next 128 bits. + Start += NumLaneElts; + End += NumLaneElts; } return true; } @@ -3353,16 +3606,21 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElems / NumSections; + // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern + // FIXME: Need a better way to get rid of this, there's no latency difference + // between UNPCKLPD and MOVDDUP, the later should always be checked first and + // the former later. We should also remove the "_undef" special mask. + if (NumElems == 4 && VT.getSizeInBits() == 256) + return false; + + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElems / NumLanes; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = s * NumSectionElts, j = s * NumSectionElts; - i != NumSectionElts * (s + 1); + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = s * NumLaneElts, j = s * NumLaneElts; + i != NumLaneElts * (s + 1); i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -3433,6 +3691,189 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } +/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// as permutations between 128-bit chunks or halves. As an example: this +/// shuffle bellow: +/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> +/// The first half comes from the second half of V1 and the second half from the +/// the second half of V2. +static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + // The shuffle result is divided into half A and half B. In total the two + // sources have 4 halves, namely: C, D, E, F. The final values of A and + // B must come from C, D, E or F. + int HalfSize = VT.getVectorNumElements()/2; + bool MatchA = false, MatchB = false; + + // Check if A comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { + MatchA = true; + break; + } + } + + // Check if B comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128 instructions. +static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int HalfSize = VT.getVectorNumElements()/2; + + int FstHalf = 0, SndHalf = 0; + for (int i = 0; i < HalfSize; ++i) { + if (SVOp->getMaskElt(i) > 0) { + FstHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + for (int i = HalfSize; i < HalfSize*2; ++i) { + if (SVOp->getMaskElt(i) > 0) { + SndHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + + return (FstHalf | (SndHalf << 4)); +} + +/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to VPERMILPD*. +/// Note that VPERMIL mask matching is different depending whether theunderlying +/// type is 32 or 64. In the VPERMILPS the high half of the mask should point +/// to the same elements of the low, but to the higher half of the source. +/// In VPERMILPD the two lanes could be shuffled independently of each other +/// with the same restriction that lanes can't be crossed. +static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + + if (!Subtarget->hasAVX()) + return false; + + // Only match 256-bit with 64-bit types + if (VT.getSizeInBits() != 256 || NumElts != 4) + return false; + + // The mask on the high lane is independent of the low. Both can match + // any element in inside its own lane, but can't cross. + int LaneSize = NumElts/NumLanes; + for (int l = 0; l < NumLanes; ++l) + for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { + int LaneStart = l*LaneSize; + if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize)) + return false; + } + + return true; +} + +/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to VPERMILPS*. +/// Note that VPERMIL mask matching is different depending whether theunderlying +/// type is 32 or 64. In the VPERMILPS the high half of the mask should point +/// to the same elements of the low, but to the higher half of the source. +/// In VPERMILPD the two lanes could be shuffled independently of each other +/// with the same restriction that lanes can't be crossed. +static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + + if (!Subtarget->hasAVX()) + return false; + + // Only match 256-bit with 32-bit types + if (VT.getSizeInBits() != 256 || NumElts != 8) + return false; + + // The mask on the high lane should be the same as the low. Actually, + // they can differ if any of the corresponding index in a lane is undef + // and the other stays in range. + int LaneSize = NumElts/NumLanes; + for (int i = 0; i < LaneSize; ++i) { + int HighElt = i+LaneSize; + bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts); + bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize); + + if (!HighValid || !LowValid) + return false; + if (Mask[i] < 0 || Mask[HighElt] < 0) + continue; + if (Mask[HighElt]-Mask[i] != LaneSize) + return false; + } + + return true; +} + +/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPS* instructions. +static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + int LaneSize = NumElts/NumLanes; + + // Although the mask is equal for both lanes do it twice to get the cases + // where a mask will match because the same mask element is undef on the + // first half but valid on the second. This would get pathological cases + // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid. + unsigned Mask = 0; + for (int l = 0; l < NumLanes; ++l) { + for (int i = 0; i < LaneSize; ++i) { + int MaskElt = SVOp->getMaskElt(i+(l*LaneSize)); + if (MaskElt < 0) + continue; + if (MaskElt >= LaneSize) + MaskElt -= LaneSize; + Mask |= MaskElt << (i*2); + } + } + + return Mask; +} + +/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPD* instructions. +static unsigned getShuffleVPERMILPDImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + + unsigned Mask = 0; + int LaneSize = NumElts/NumLanes; + for (int l = 0; l < NumLanes; ++l) + for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { + int MaskElt = SVOp->getMaskElt(i); + if (MaskElt < 0) + continue; + Mask |= (MaskElt-l*LaneSize) << i; + } + + return Mask; +} + /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. @@ -3463,58 +3904,92 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) return false; - // Expect 1, 1, 3, 3 - for (unsigned i = 0; i < 2; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 1) - return false; - } + // The second vector must be undef + if (N->getOperand(1).getOpcode() != ISD::UNDEF) + return false; - bool HasHi = false; - for (unsigned i = 2; i < 4; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 3) + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if ((VT.getSizeInBits() == 128 && NumElems != 4) || + (VT.getSizeInBits() == 256 && NumElems != 8)) + return false; + + // "i+1" is the value the indexed mask element must have + for (unsigned i = 0; i < NumElems; i += 2) + if (!isUndefOrEqual(N->getMaskElt(i), i+1) || + !isUndefOrEqual(N->getMaskElt(i+1), i+1)) return false; - if (Elt == 3) - HasHi = true; - } - // Don't use movshdup if it can be done with a shufps. - // FIXME: verify that matching u, u, 3, 3 is what we want. - return HasHi; + + return true; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + return false; + + // The second vector must be undef + if (N->getOperand(1).getOpcode() != ISD::UNDEF) + return false; + + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if ((VT.getSizeInBits() == 128 && NumElems != 4) || + (VT.getSizeInBits() == 256 && NumElems != 8)) return false; - // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) - if (N->getMaskElt(i) > 0) + // "i" is the value the indexed mask element must have + for (unsigned i = 0; i < NumElems; i += 2) + if (!isUndefOrEqual(N->getMaskElt(i), i) || + !isUndefOrEqual(N->getMaskElt(i+1), i)) return false; - bool HasHi = false; - for (unsigned i = 2; i < 4; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 2) + return true; +} + +/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// version of MOVDDUP. +static bool isMOVDDUPYMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + int NumElts = VT.getVectorNumElements(); + bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF; + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 || + !V2IsUndef || NumElts != 4) + return false; + + for (int i = 0; i != NumElts/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), 0)) return false; - if (Elt == 2) - HasHi = true; - } - // Don't use movsldup if it can be done with a shufps. - return HasHi; + for (int i = NumElts/2; i != NumElts; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2)) + return false; + return true; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVDDUP. +/// specifies a shuffle of elements that is suitable for input to 128-bit +/// version of MOVDDUP. bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { - int e = N->getValueType(0).getVectorNumElements() / 2; + EVT VT = N->getValueType(0); + + if (VT.getSizeInBits() != 128) + return false; + int e = VT.getVectorNumElements() / 2; for (int i = 0; i < e; ++i) if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; @@ -3627,6 +4102,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { if (Val >= 0) break; } + assert(Val - i > 0 && "PALIGNR imm should be positive"); return (Val - i) * EltSize; } @@ -3644,7 +4120,6 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { EVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); - return Index / NumElemsPerChunk; } @@ -3662,7 +4137,6 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { EVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); - return Index / NumElemsPerChunk; } @@ -3716,7 +4190,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { - if (Op->getValueType(0).getVectorNumElements() != 4) + EVT VT = Op->getValueType(0); + if (VT.getSizeInBits() != 128) + return false; + if (VT.getVectorNumElements() != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) @@ -3748,6 +4225,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, ShuffleVectorSDNode *Op) { + EVT VT = Op->getValueType(0); + if (VT.getSizeInBits() != 128) + return false; + if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -3755,7 +4236,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Op->getValueType(0).getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4) return false; @@ -3811,7 +4292,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, +static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -3819,7 +4300,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.getSizeInBits() == 128) { // SSE - if (HasSSE2) { // SSE2 + if (HasXMMInt) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 @@ -3838,21 +4319,25 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to -/// their original type, ensuring they get CSE'd. +/// Always build ones vectors as <4 x i32>. For 256-bit types, use two +/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their +/// original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected a 128-bit or 256-bit vector type"); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Cst, Cst, Cst, Cst); - SDValue Vec; if (VT.is256BitVector()) { - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); - } else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), + Vec, DAG.getConstant(0, MVT::i32), DAG, dl); + Vec = Insert128BitVector(InsV, Vec, + DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + } + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -3902,7 +4387,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +/// getUnpackh - Returns a vector_shuffle node for an unpackh operation. static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); @@ -3915,31 +4400,95 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - EVT PVT = MVT::v4f32; - EVT VT = SV->getValueType(0); - DebugLoc dl = SV->getDebugLoc(); - SDValue V1 = SV->getOperand(0); +// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by +// a generic shuffle instruction because the target has no such instructions. +// Generate shuffles which repeat i16 and i8 several times until they can be +// represented by v4f32 and then be manipulated by target suported shuffles. +static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { + EVT VT = V.getValueType(); int NumElems = VT.getVectorNumElements(); - int EltNo = SV->getSplatIndex(); + DebugLoc dl = V.getDebugLoc(); - // unpack elements to the correct location while (NumElems > 4) { if (EltNo < NumElems/2) { - V1 = getUnpackl(DAG, dl, VT, V1, V1); + V = getUnpackl(DAG, dl, VT, V, V); } else { - V1 = getUnpackh(DAG, dl, VT, V1, V1); + V = getUnpackh(DAG, dl, VT, V, V); EltNo -= NumElems/2; } NumElems >>= 1; } + return V; +} + +/// getLegalSplat - Generate a legal splat with supported x86 shuffles +static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { + EVT VT = V.getValueType(); + DebugLoc dl = V.getDebugLoc(); + assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256) + && "Vector size not supported"); + + if (VT.getSizeInBits() == 128) { + V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; + V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), + &SplatMask[0]); + } else { + // To use VPERMILPS to splat scalars, the second half of indicies must + // refer to the higher part, which is a duplication of the lower one, + // because VPERMILPS can only handle in-lane permutations. + int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo, + EltNo+4, EltNo+4, EltNo+4, EltNo+4 }; + + V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V); + V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32), + &SplatMask[0]); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, V); +} + +/// PromoteSplat - Splat is promoted to target supported vector shuffles. +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { + EVT SrcVT = SV->getValueType(0); + SDValue V1 = SV->getOperand(0); + DebugLoc dl = SV->getDebugLoc(); + + int EltNo = SV->getSplatIndex(); + int NumElems = SrcVT.getVectorNumElements(); + unsigned Size = SrcVT.getSizeInBits(); + + assert(((Size == 128 && NumElems > 4) || Size == 256) && + "Unknown how to promote splat for type"); + + // Extract the 128-bit part containing the splat element and update + // the splat element index when it refers to the higher register. + if (Size == 256) { + unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0; + V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl); + if (Idx > 0) + EltNo -= NumElems/2; + } + + // All i16 and i8 vector types can't be used directly by a generic shuffle + // instruction because the target has no such instruction. Generate shuffles + // which repeat i16 and i8 several times until they fit in i32, and then can + // be manipulated by target suported shuffles. + EVT EltVT = SrcVT.getVectorElementType(); + if (EltVT == MVT::i8 || EltVT == MVT::i16) + V1 = PromoteSplati8i16(V1, DAG, EltNo); + + // Recreate the 256-bit vector and place the same 128-bit vector + // into the low and high part. This is necessary because we want + // to use VPERM* to shuffle the vectors + if (Size == 256) { + SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + V1 = Insert128BitVector(InsV, V1, + DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + } - // Perform the splat. - int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; - V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1); - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, V1); + return getLegalSplat(DAG, V1, EltNo); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3947,11 +4496,11 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { /// element of V2 is swizzled into the zero/undef vector, landing at element /// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, - bool isZero, bool HasSSE2, - SelectionDAG &DAG) { + bool isZero, bool HasXMMInt, + SelectionDAG &DAG) { EVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) @@ -4005,6 +4554,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: DecodeUNPCKHPMask(NumElems, ShuffleMask); break; case X86ISD::PUNPCKLBW: @@ -4015,8 +4566,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); @@ -4052,8 +4601,41 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } + case X86ISD::VPERMILPS: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPSY: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPDY: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERM2F128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::MOVDDUP: + case X86ISD::MOVLHPD: + case X86ISD::MOVLPD: + case X86ISD::MOVLPS: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::PALIGN: + return SDValue(); // Not yet implemented. default: - assert("not implemented for target shuffle node"); + assert(0 && "unknown target shuffle node"); return SDValue(); } @@ -4205,6 +4787,11 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, /// logical left or right shift of a vector. static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + // Although the logic below support any bitwidth size, there are no + // shift instructions which handle more than 128-bit vectors. + if (SVOp->getValueType(0).getSizeInBits() > 128) + return false; + if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt)) return true; @@ -4295,6 +4882,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { + assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); @@ -4333,42 +4921,52 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, return SDValue(); } + // FIXME: 256-bit vector instructions don't require a strict alignment, + // improve this code to support it better. + unsigned RequiredAlign = VT.getSizeInBits()/8; SDValue Chain = LD->getChain(); - // Make sure the stack object alignment is at least 16. + // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - if (DAG.InferPtrAlignment(Ptr) < 16) { + if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { if (MFI->isFixedObjectIndex(FI)) { // Can't change the alignment. FIXME: It's possible to compute // the exact stack offset and reference FI + adjust offset instead. // If someone *really* cares about this. That's the way to implement it. return SDValue(); } else { - MFI->setObjectAlignment(FI, 16); + MFI->setObjectAlignment(FI, RequiredAlign); } } - // (Offset % 16) must be multiple of 4. Then address is then + // (Offset % 16 or 32) must be multiple of 4. Then address is then // Ptr + (Offset & ~15). if (Offset < 0) return SDValue(); - if ((Offset % 16) & 3) + if ((Offset % RequiredAlign) & 3) return SDValue(); - int64_t StartOffset = Offset & ~15; + int64_t StartOffset = Offset & ~(RequiredAlign-1); if (StartOffset) Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); int EltNo = (Offset - StartOffset) >> 2; - int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; - EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr, + int NumElems = VT.getVectorNumElements(); + + EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); + SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), false, false, 0); - // Canonicalize it to a v4i32 shuffle. - V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getVectorShuffle(MVT::v4i32, dl, V1, - DAG.getUNDEF(MVT::v4i32),&Mask[0])); + + // Canonicalize it to a v4i32 or v8i32 shuffle. + SmallVector<int, 8> Mask; + for (int i = 0; i < NumElems; ++i) + Mask.push_back(EltNo); + + V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1); + return DAG.getNode(ISD::BITCAST, dl, NVT, + DAG.getVectorShuffle(CanonVT, dl, V1, + DAG.getUNDEF(CanonVT),&Mask[0])); } return SDValue(); @@ -4428,12 +5026,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->getAlignment()); - } else if (NumElems == 4 && LastLoadedElt == 1) { + } else if (NumElems == 4 && LastLoadedElt == 1 && + DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, - Ops, 2, MVT::i32, - LDBase->getMemOperand()); + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64, + LDBase->getPointerInfo(), + LDBase->getAlignment(), + false/*isVolatile*/, true/*ReadMem*/, + false/*WriteMem*/); return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); @@ -4445,47 +5047,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT ExtVT = VT.getVectorElementType(); - unsigned NumElems = Op.getNumOperands(); - // For AVX-length vectors, build the individual 128-bit pieces and - // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !ISD::isBuildVectorAllZeros(Op.getNode())) { - SmallVector<SDValue, 8> V; - V.resize(NumElems); - for (unsigned i = 0; i < NumElems; ++i) { - V[i] = Op.getOperand(i); - } - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); - - // Build the lower subvector. - SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); - // Build the upper subvector. - SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], - NumElems/2); + // Vectors containing all zeros can be matched by pxor and xorps later + if (ISD::isBuildVectorAllZeros(Op.getNode())) { + // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd + // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) + return Op; - return ConcatVectors(Lower, Upper, DAG); + return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl); } - // All zero's: - // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) - // All one's: - // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) - if (ISD::isBuildVectorAllZeros(Op.getNode()) || - ISD::isBuildVectorAllOnes(Op.getNode())) { - // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to - // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are - // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || - Op.getValueType() == MVT::v8i32) + // Vectors containing all ones can be matched by pcmpeqd on 128-bit width + // vectors or broken into v4i32 operations on 256-bit vectors. + if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (Op.getValueType() == MVT::v4i32) return Op; - if (ISD::isBuildVectorAllOnes(Op.getNode())) - return getOnesVector(Op.getValueType(), DAG, dl); - return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); + return getOnesVector(Op.getValueType(), DAG, dl); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4538,7 +5119,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. @@ -4566,7 +5147,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { (ExtVT == MVT::i64 && Subtarget->is64Bit())) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(), DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); @@ -4574,7 +5155,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -4603,7 +5184,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(i == Idx ? 0 : 1); @@ -4631,6 +5212,27 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (IsAllConstants) return SDValue(); + // For AVX-length vectors, build the individual 128-bit pieces and use + // shuffles to put them in place. + if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) { + SmallVector<SDValue, 32> V; + for (unsigned i = 0; i < NumElems; ++i) + V.push_back(Op.getOperand(i)); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + + // Build both the lower and upper subvector. + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], + NumElems/2); + + // Recreate the wider vector with the lower and upper part. + SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) { if (NumNonZero == 1) { @@ -4639,7 +5241,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); } return SDValue(); } @@ -4664,7 +5266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1 << i)); if (isZero) - V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); else V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } @@ -4708,7 +5310,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41()) { + if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -4758,13 +5360,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - // We support concatenate two MMX registers and place them in a MMX - // register. This is better than doing a stack convert. +// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place +// them in a MMX register. This is better than doing a stack convert. +static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT ResVT = Op.getValueType(); - assert(Op.getNumOperands() == 2); + assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || ResVT == MVT::v8i16 || ResVT == MVT::v16i8); int Mask[2]; @@ -4785,6 +5386,42 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); } +// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction +// to create 256-bit vectors from two other 128-bit ones. +static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + EVT ResVT = Op.getValueType(); + + assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide"); + + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + unsigned NumElems = ResVT.getVectorNumElements(); + + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); +} + +SDValue +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { + EVT ResVT = Op.getValueType(); + + assert(Op.getNumOperands() == 2); + assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) && + "Unsupported CONCAT_VECTORS for value type"); + + // We support concatenate two MMX registers and place them in a MMX register. + // This is better than doing a stack convert. + if (ResVT.is128BitVector()) + return LowerMMXCONCAT_VECTORS(Op, DAG); + + // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors + // from two other 128-bit ones. + return LowerAVXCONCAT_VECTORS(Op, DAG); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -4844,7 +5481,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -4917,7 +5554,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -4985,7 +5622,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && + (Subtarget->hasSSSE3() || Subtarget->hasAVX())) NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFLWImmediate(NewV.getNode()), @@ -5013,7 +5651,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && + (Subtarget->hasSSSE3() || Subtarget->hasAVX())) NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFHWImmediate(NewV.getNode()), @@ -5079,7 +5718,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3()) { + if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) { SmallVector<SDValue,16> pshufbMask; // If all result elements are from one input vector, then only translate @@ -5276,15 +5915,109 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, OpVT, SrcOp))); } -/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of -/// shuffles. +/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector +/// shuffle node referes to only one lane in the sources. +static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + int HalfSize = NumElems/2; + SmallVector<int, 16> M; + SVOp->getMask(M); + bool MatchA = false, MatchB = false; + + for (int l = 0; l < NumElems*2; l += HalfSize) { + if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) { + MatchA = true; + break; + } + } + + for (int l = 0; l < NumElems*2; l += HalfSize) { + if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles +/// which could not be matched by any known target speficic shuffle +static SDValue +LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + if (areShuffleHalvesWithinDisjointLanes(SVOp)) { + // If each half of a vector shuffle node referes to only one lane in the + // source vectors, extract each used 128-bit lane and shuffle them using + // 128-bit shuffles. Then, concatenate the results. Otherwise leave + // the work to the legalizer. + DebugLoc dl = SVOp->getDebugLoc(); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + int HalfSize = NumElems/2; + + // Extract the reference for each half + int FstVecExtractIdx = 0, SndVecExtractIdx = 0; + int FstVecOpNum = 0, SndVecOpNum = 0; + for (int i = 0; i < HalfSize; ++i) { + int Elt = SVOp->getMaskElt(i); + if (SVOp->getMaskElt(i) < 0) + continue; + FstVecOpNum = Elt/NumElems; + FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; + break; + } + for (int i = HalfSize; i < NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (SVOp->getMaskElt(i) < 0) + continue; + SndVecOpNum = Elt/NumElems; + SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; + break; + } + + // Extract the subvectors + SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum), + DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum), + DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl); + + // Generate 128-bit shuffles + SmallVector<int, 16> MaskV1, MaskV2; + for (int i = 0; i < HalfSize; ++i) { + int Elt = SVOp->getMaskElt(i); + MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize); + } + for (int i = HalfSize; i < NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize); + } + + EVT NVT = V1.getValueType(); + V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]); + V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]); + + // Concatenate the result back + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + return SDValue(); +} + +/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with +/// 4 elements, and match them with several different shuffle types. static SDValue -LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { +LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); EVT VT = SVOp->getValueType(0); + assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); + SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); SmallVector<int, 8> Mask1(4U, -1); @@ -5542,18 +6275,21 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { static SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, - bool HasSSE2) { + bool HasXMMInt) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); assert(VT != MVT::v2i64 && "unsupported shuffle type"); - if (HasSSE2 && VT == MVT::v2f64) + if (HasXMMInt && VT == MVT::v2f64) return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); - // v4f32 or v4i32 - return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); + // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1) + return DAG.getNode(ISD::BITCAST, dl, VT, + getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1), + DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG)); } static @@ -5572,8 +6308,24 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); } +static inline unsigned getSHUFPOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8f32: + case MVT::v4i32: // Use fp unit for int unpack. + case MVT::v4f32: return X86ISD::SHUFPS; + case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4f64: + case MVT::v2i64: // Use fp unit for int unpack. + case MVT::v2f64: return X86ISD::SHUFPD; + default: + llvm_unreachable("Unknown type for shufp*"); + } + return 0; +} + static -SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { +SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); @@ -5602,7 +6354,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { CanFoldLoad = false; if (CanFoldLoad) { - if (HasSSE2 && NumElems == 2) + if (HasXMMInt && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) @@ -5616,28 +6368,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // this is horrible, but will stay like this until we move all shuffle // matching to x86 specific nodes. Note that for the 1st condition all // types are matched with movsd. - if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp)) - return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); - else if (HasSSE2) + if (HasXMMInt) { + // FIXME: isMOVLMask should be checked and matched before getMOVLP, + // as to remove this logic from here, as much as possible + if (NumElems == 2 || !X86::isMOVLMask(SVOp)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); - + } assert(VT != MVT::v4i32 && "unsupported shuffle type"); // Invert the operand order and use SHUFPS to match it. - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1, + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1, X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { +static inline unsigned getUNPCKLOpcode(EVT VT) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; - case MVT::v2f64: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; @@ -5653,6 +6407,10 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; default: @@ -5661,6 +6419,68 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { return 0; } +static inline unsigned getVPERMILOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: + case MVT::v4f32: return X86ISD::VPERMILPS; + case MVT::v2i64: + case MVT::v2f64: return X86ISD::VPERMILPD; + case MVT::v8i32: + case MVT::v8f32: return X86ISD::VPERMILPSY; + case MVT::v4i64: + case MVT::v4f64: return X86ISD::VPERMILPDY; + default: + llvm_unreachable("Unknown type for vpermil"); + } + return 0; +} + +/// isVectorBroadcast - Check if the node chain is suitable to be xformed to +/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming +/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded. +static bool isVectorBroadcast(SDValue &Op) { + EVT VT = Op.getValueType(); + bool Is256 = VT.getSizeInBits() == 256; + + assert((VT.getSizeInBits() == 128 || Is256) && + "Unsupported type for vbroadcast node"); + + SDValue V = Op; + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (Is256 && !(V.hasOneUse() && + V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(0).getOpcode() == ISD::UNDEF)) + return false; + + if (Is256) + V = V.getOperand(1); + + if (!V.hasOneUse()) + return false; + + // Check the source scalar_to_vector type. 256-bit broadcasts are + // supported for 32/64-bit sizes, while 128-bit ones are only supported + // for 32-bit scalars. + if (V.getOpcode() != ISD::SCALAR_TO_VECTOR) + return false; + + unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits(); + if (ScalarSize != 32 && ScalarSize != 64) + return false; + if (!Is256 && ScalarSize == 64) + return false; + + V = V.getOperand(0); + if (!MayFoldLoad(V)) + return false; + + // Return the load node + Op = V; + return true; +} + static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -5672,23 +6492,29 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) - return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); // Handle splat operations if (SVOp->isSplat()) { - // Special case, this is the only place now where it's - // allowed to return a vector_shuffle operation without - // using a target specific node, because *hopefully* it - // will be optimized away by the dag combiner. - if (VT.getVectorNumElements() <= 4 && - CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) + unsigned NumElem = VT.getVectorNumElements(); + int Size = VT.getSizeInBits(); + // Special case, this is the only place now where it's allowed to return + // a vector_shuffle operation without using a target specific node, because + // *hopefully* it will be optimized away by the dag combiner. FIXME: should + // this be moved to DAGCombine instead? + if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) return Op; - // Handle splats by matching through known masks - if (VT.getVectorNumElements() <= 4) + // Use vbroadcast whenever the splat comes from a foldable load + if (Subtarget->hasAVX() && isVectorBroadcast(V1)) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1); + + // Handle splats by matching through known shuffle masks + if ((Size == 128 && NumElem <= 4) || + (Size == 256 && NumElem < 8)) return SDValue(); - // Canonicalize all of the remaining to v4f32. + // All remaning splats are promoted to target supported vector shuffles. return PromoteSplat(SVOp, DAG); } @@ -5698,7 +6524,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); - } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + } else if ((VT == MVT::v4i32 || + (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { @@ -5731,9 +6558,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); - bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX(); + bool HasXMMInt = Subtarget->hasXMMInt(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -5765,21 +6590,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); - if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && - RelaxedMayFoldVectorLoad(V1)) + if (X86::isMOVDDUPMask(SVOp) && + (Subtarget->hasSSE3() || Subtarget->hasAVX()) && + V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); if (X86::isMOVHLPS_v_undef_Mask(SVOp)) return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -5792,24 +6616,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); - if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) + if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32)) return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); - if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) - return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1, - TargetMask, DAG); - - if (VT == MVT::v4f32) - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1, - TargetMask, DAG); + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1, + TargetMask, DAG); } // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasXMMInt() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -5824,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); if (!X86::isMOVLPMask(SVOp)) { - if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); if (VT == MVT::v4i32 || VT == MVT::v4f32) @@ -5834,19 +6653,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // FIXME: fold these into legal mask. if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + if (X86::isMOVSHDUPMask(SVOp, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + if (X86::isMOVSLDUPMask(SVOp, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); + return getMOVLP(Op, dl, DAG, HasXMMInt); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5887,8 +6706,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5915,8 +6733,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); @@ -5932,18 +6749,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - if (isPALIGNRMask(M, VT, HasSSSE3)) + if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, X86::getShufflePALIGNRImmediate(SVOp), DAG); if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) { - X86ISD::NodeType Opcode = - getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; - return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); - } + if (VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -5958,23 +6772,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShufflePSHUFLWImmediate(SVOp), DAG); - if (isSHUFPMask(M, VT)) { - unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); - if (VT == MVT::v4f32 || VT == MVT::v4i32) - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2, - TargetMask, DAG); - if (VT == MVT::v2f64 || VT == MVT::v2i64) - return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2, - TargetMask, DAG); - } + if (isSHUFPMask(M, VT)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + + //===--------------------------------------------------------------------===// + // Generate target specific nodes for 128 or 256-bit shuffles only + // supported in the AVX instruction set. + // + + // Handle VMOVDDUPY permutations + if (isMOVDDUPYMask(SVOp, Subtarget)) + return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); + + // Handle VPERMILPS* permutations + if (isVPERMILPSMask(M, VT, Subtarget)) + return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, + getShuffleVPERMILPSImmediate(SVOp), DAG); + + // Handle VPERMILPD* permutations + if (isVPERMILPDMask(M, VT, Subtarget)) + return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, + getShuffleVPERMILPDImmediate(SVOp), DAG); + + // Handle VPERM2F128 permutations + if (isVPERM2F128Mask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, + getShuffleVPERM2F128Immediate(SVOp), DAG); + + // Handle VSHUFPSY permutations + if (isVSHUFPSYMask(M, VT, Subtarget)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + getShuffleVSHUFPSYImmediate(SVOp), DAG); + + // Handle VSHUFPDY permutations + if (isVSHUFPDYMask(M, VT, Subtarget)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + getShuffleVSHUFPDYImmediate(SVOp), DAG); + + //===--------------------------------------------------------------------===// + // Since no target specific shuffle was selected for this generic one, + // lower it into other known shuffles. FIXME: this isn't true yet, but + // this is the plan. + // // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { @@ -5989,9 +6834,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles. - if (NumElems == 4) - return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); + // Handle all 128-bit wide vectors with 4 elements, and match them with + // several different shuffle types. + if (NumElems == 4 && VT.getSizeInBits() == 128) + return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG); + + // Handle general 256-bit shuffles + if (VT.is256BitVector()) + return LowerVECTOR_SHUFFLE_256(SVOp, DAG); return SDValue(); } @@ -6001,6 +6851,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + + if (Op.getOperand(0).getValueType().getSizeInBits() != 128) + return SDValue(); + if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); @@ -6060,36 +6914,26 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = Op.getOperand(0); EVT VecVT = Vec.getValueType(); - // If this is a 256-bit vector result, first extract the 128-bit - // vector and then extract from the 128-bit vector. - if (VecVT.getSizeInBits() > 128) { + // If this is a 256-bit vector result, first extract the 128-bit vector and + // then extract the element from the 128-bit vector. + if (VecVT.getSizeInBits() == 256) { DebugLoc dl = Op.getNode()->getDebugLoc(); unsigned NumElems = VecVT.getVectorNumElements(); SDValue Idx = Op.getOperand(1); - - if (!isa<ConstantSDNode>(Idx)) - return SDValue(); - - unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128); unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); // Get the 128-bit vector. - bool Upper = IdxVal >= ExtractNumElems; - Vec = Extract128BitVector(Vec, Idx, DAG, dl); - - // Extract from it. - SDValue ScaledIdx = Idx; - if (Upper) - ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx, - DAG.getConstant(ExtractNumElems, - Idx.getValueType())); + bool Upper = IdxVal >= NumElems/2; + Vec = Extract128BitVector(Vec, + DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, - ScaledIdx); + Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx); } assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41()) { + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -6120,7 +6964,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Op; // SHUFPS the element to the lowest double word, then movss. - int Mask[4] = { Idx, -1, -1, -1 }; + int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 }; EVT VVT = Op.getOperand(0).getValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); @@ -6159,6 +7003,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); + if (VT.getSizeInBits() == 256) + return SDValue(); + if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { unsigned Opc; @@ -6206,35 +7053,28 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - // If this is a 256-bit vector result, first insert into a 128-bit - // vector and then insert into the 256-bit vector. - if (VT.getSizeInBits() > 128) { + // If this is a 256-bit vector result, first extract the 128-bit vector, + // insert the element into the extracted half and then place it back. + if (VT.getSizeInBits() == 256) { if (!isa<ConstantSDNode>(N2)) return SDValue(); - // Get the 128-bit vector. + // Get the desired 128-bit vector half. unsigned NumElems = VT.getVectorNumElements(); unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue(); - bool Upper = IdxVal >= NumElems / 2; - - SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl); + bool Upper = IdxVal >= NumElems/2; + SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32); + SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl); - // Insert into it. - SDValue ScaledN2 = N2; - if (Upper) - ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / - (VT.getSizeInBits() / 128), - N2.getValueType())); - Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, - N1, ScaledN2); + // Insert the element into the desired half. + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, + N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2); - // Insert the 128-bit vector - // FIXME: Why UNDEF? - return Insert128BitVector(N0, Op, N2, DAG, dl); + // Insert the changed part back to the 256-bit vector + return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41()) + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -6405,12 +7245,17 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { CodeModel::Model M = getTargetMachine().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && - (M == CodeModel::Small || M == CodeModel::Kernel)) + (M == CodeModel::Small || M == CodeModel::Kernel)) { + if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF()) + OpFlag = X86II::MO_GOTPCREL; WrapperKind = X86ISD::WrapperRIP; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleStubPIC()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOT; + } else if (Subtarget->isPICStyleStubPIC()) { + OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE; + } else if (Subtarget->isPICStyleStubNoDynamic()) { + OpFlag = X86II::MO_DARWIN_NONLAZY; + } SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); @@ -6427,6 +7272,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { Result); } + // For symbols that require a load from a stub to get the address, emit the + // load. + if (isGlobalStubReference(OpFlag)) + Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, 0); + return Result; } @@ -6676,7 +7527,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), + Chain.getValue(1)); } assert(false && @@ -6922,9 +7774,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, // Load the 32-bit value into an XMM register. SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, - DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Op.getOperand(0), - DAG.getIntPtrConstant(0))); + Op.getOperand(0)); + + // Zero out the upper parts of the register. + Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(), + DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), @@ -7513,6 +8367,9 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + + if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); + assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -7563,6 +8420,39 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(X86CC, MVT::i8), EFLAGS); } +// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 +// ones, and then concatenate the result back. +static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC && + "Unsupported value type for operation"); + + int NumElems = VT.getVectorNumElements(); + DebugLoc dl = Op.getDebugLoc(); + SDValue CC = Op.getOperand(2); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + // Extract the RHS vectors + SDValue RHS = Op.getOperand(1); + SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + + // Issue the operation on the smaller types and concatenate the result back + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); +} + + SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); @@ -7575,11 +8465,21 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (isFP) { unsigned SSECC = 8; - EVT VT0 = Op0.getValueType(); - assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); - unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; + EVT EltVT = Op0.getValueType().getVectorElementType(); + assert(EltVT == MVT::f32 || EltVT == MVT::f64); + + unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; + // SSE Condition code mapping: + // 0 - EQ + // 1 - LT + // 2 - LE + // 3 - UNORD + // 4 - NEQ + // 5 - NLT + // 6 - NLE + // 7 - ORD switch (SetCCOpcode) { default: break; case ISD::SETOEQ: @@ -7624,6 +8524,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); } + // Break 256-bit integer vector compare into smaller ones. + if (!isFP && VT.getSizeInBits() == 256) + return Lower256IntVSETCC(Op, DAG); + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. @@ -7654,6 +8558,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (Swap) std::swap(Op0, Op1); + // Check that the operation in question is available (most are plain SSE2, + // but PCMPGTQ and PCMPEQQ have different requirements). + if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX()) + return SDValue(); + if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX()) + return SDValue(); + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { @@ -8014,9 +8925,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && - "This should be used only on Windows targets"); - assert(!Subtarget->isTargetEnvMacho()); + assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() || + EnableSegmentedStacks) && + "This should be used only on Windows targets or when segmented stacks " + "are being used"); + assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -8024,23 +8937,49 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); // FIXME: Ensure alignment here - SDValue Flag; + bool Is64Bit = Subtarget->is64Bit(); + EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); + if (EnableSegmentedStacks) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); - Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); - Flag = Chain.getValue(1); + if (Is64Bit) { + // The 64 bit implementation of segmented stacks needs to clobber both r10 + // r11. This makes it impossible to use it along with nested parameters. + const Function *F = MF.getFunction(); + + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) + if (I->hasNestAttr()) + report_fatal_error("Cannot use segmented stacks with functions that " + "have nested arguments."); + } + + const TargetRegisterClass *AddrRegClass = + getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32); + unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); + Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); + SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + DAG.getRegister(Vreg, SPTy)); + SDValue Ops1[2] = { Value, Chain }; + return DAG.getMergeValues(Ops1, 2, dl); + } else { + SDValue Flag; + unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); + Flag = Chain.getValue(1); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); - Flag = Chain.getValue(1); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + Flag = Chain.getValue(1); - Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); - SDValue Ops1[2] = { Chain.getValue(0), Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + SDValue Ops1[2] = { Chain.getValue(0), Chain }; + return DAG.getMergeValues(Ops1, 2, dl); + } } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -8118,7 +9057,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT ArgVT = Op.getNode()->getValueType(0); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); uint8_t ArgMode; @@ -8292,6 +9231,19 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // Arithmetic intrinsics. + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: + case Intrinsic::x86_avx_hadd_ps_256: + case Intrinsic::x86_avx_hadd_pd_256: + return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse3_hsub_ps: + case Intrinsic::x86_sse3_hsub_pd: + case Intrinsic::x86_avx_hsub_ps_256: + case Intrinsic::x86_avx_hsub_pd_256: + return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -8535,8 +9487,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } -SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -8552,8 +9509,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. - const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); - const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); + const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10); + const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11); const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix @@ -8600,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); } else { const Function *Func = cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); @@ -8619,7 +9574,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, NestReg = X86::ECX; // Check that ECX wasn't needed by an 'inreg' parameter. - const FunctionType *FTy = Func->getFunctionType(); + FunctionType *FTy = Func->getFunctionType(); const AttrListPtr &Attrs = Func->getAttributes(); if (!Attrs.isEmpty() && !Func->isVarArg()) { @@ -8657,7 +9612,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. - const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); + const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr), @@ -8682,9 +9637,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); } } @@ -8822,8 +9775,58 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { return Op; } -SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { +// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit +// ones, and then concatenate the result back. +static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + assert(VT.getSizeInBits() == 256 && VT.isInteger() && + "Unsupported value type for operation"); + + int NumElems = VT.getVectorNumElements(); + DebugLoc dl = Op.getDebugLoc(); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + // Extract the RHS vectors + SDValue RHS = Op.getOperand(1); + SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); +} + +SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType().getSizeInBits() == 256 && + Op.getValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return Lower256IntArith(Op, DAG); +} + +SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType().getSizeInBits() == 256 && + Op.getValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return Lower256IntArith(Op, DAG); +} + +SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + + // Decompose 256-bit ops into smaller 128-bit ops. + if (VT.getSizeInBits() == 256) + return Lower256IntArith(Op, DAG); + assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -8872,11 +9875,51 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - LLVMContext *Context = DAG.getContext(); - // Must have SSE2. - if (!Subtarget->hasSSE2()) return SDValue(); + if (!Subtarget->hasXMMInt()) + return SDValue(); + + // Decompose 256-bit shifts into smaller 128-bit shifts. + if (VT.getSizeInBits() == 256) { + int NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + // Extract the two vectors + SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + + // Recreate the shift amount vectors + SDValue Amt1, Amt2; + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + // Constant shift amount + SmallVector<SDValue, 4> Amt1Csts; + SmallVector<SDValue, 4> Amt2Csts; + for (int i = 0; i < NumElems/2; ++i) + Amt1Csts.push_back(Amt->getOperand(i)); + for (int i = NumElems/2; i < NumElems; ++i) + Amt2Csts.push_back(Amt->getOperand(i)); + + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt1Csts[0], NumElems/2); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt2Csts[0], NumElems/2); + } else { + // Variable shift amount + Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); + Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + // Issue new vector shifts for the smaller types + V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); + V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); + + // Concatenate the result back + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); + } // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { @@ -8927,9 +9970,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE2 or later. - if (!Subtarget->hasSSE2()) return SDValue(); - if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), @@ -8971,7 +10011,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -8986,13 +10026,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, - R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, + R, DAG.getNode(ISD::ADD, dl, VT, R, R)); return R; } return SDValue(); @@ -9057,8 +10097,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(X86::COND_O, MVT::i32), SDValue(Sum.getNode(), 2)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); - return Sum; + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } } @@ -9071,8 +10110,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); - return Sum; + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ @@ -9080,8 +10118,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) SDNode* Node = Op.getNode(); EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); - - if (Subtarget->hasSSE2() && VT.isVector()) { + if (Subtarget->hasXMMInt() && VT.isVector()) { unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); @@ -9091,11 +10128,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); - case MVT::v2i64: { - SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; - SRAIntrinsicsID = 0; - break; - } case MVT::v4i32: { SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; @@ -9115,12 +10147,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) // In case of 1 bit sext, no need to shr if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; - if (SRAIntrinsicsID) { - Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(SRAIntrinsicsID, MVT::i32), - Tmp1, ShAmt); - } - return Tmp1; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); } return SDValue(); @@ -9132,7 +10161,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { + if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { @@ -9172,6 +10201,45 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); } +SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { + // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for + // no-sse2). There isn't any reason to disable it if the target processor + // supports it. + if (Subtarget->hasXMMInt() || Subtarget->is64Bit()) + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); + + SDValue Chain = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, MVT::i32); + SDValue Ops[] = { + DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(0, MVT::i32), // Disp + DAG.getRegister(0, MVT::i32), // Segment. + Zero, + Chain + }; + SDNode *Res = + DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, + array_lengthof(Ops)); + return SDValue(Res, 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc DL = Op.getDebugLoc(); @@ -9227,7 +10295,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); - assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && + assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && @@ -9255,7 +10323,34 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { Node->getOperand(0), Node->getOperand(1), negOp, cast<AtomicSDNode>(Node)->getSrcValue(), - cast<AtomicSDNode>(Node)->getAlignment()); + cast<AtomicSDNode>(Node)->getAlignment(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); +} + +static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT(); + + // Convert seq_cst store -> xchg + // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) + // FIXME: On 32-bit, store -> fist or movq would be more efficient + // (The only way to get a 16-byte store is cmpxchg16b) + // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. + if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent || + !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + return Swap.getValue(1); + } + // Other atomic stores have a simple pattern. + return Op; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { @@ -9291,8 +10386,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); + case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -9318,7 +10415,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -9332,11 +10428,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); - case ISD::MUL: return LowerMUL_V2I64(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, DAG); @@ -9352,15 +10449,38 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADD: return LowerADD(Op, DAG); + case ISD::SUB: return LowerSUB(Op, DAG); } } +static void ReplaceATOMIC_LOAD(SDNode *Node, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + DebugLoc dl = Node->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT(); + + // Convert wide load -> cmpxchg8b/cmpxchg16b + // FIXME: On 32-bit, load -> fild or movq would be more efficient + // (The only way to get a 16-byte load is cmpxchg16b) + // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment. + SDValue Zero = DAG.getConstant(0, VT); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, + Node->getOperand(0), + Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); +} + void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG, unsigned NewOp) const { - EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); - assert (T == MVT::i64 && "Only know how to expand i64 atomics"); + assert (Node->getValueType(0) == MVT::i64 && + "Only know how to expand i64 atomics"); SDValue Chain = Node->getOperand(0); SDValue In1 = Node->getOperand(1); @@ -9423,37 +10543,48 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); - assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); + assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); + bool Regs64bit = T == MVT::i128; + EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; - cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), - DAG.getConstant(0, MVT::i32)); - cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), - DAG.getConstant(1, MVT::i32)); - cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue()); - cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH, - cpInL.getValue(1)); + cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), + DAG.getConstant(0, HalfT)); + cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), + DAG.getConstant(1, HalfT)); + cpInL = DAG.getCopyToReg(N->getOperand(0), dl, + Regs64bit ? X86::RAX : X86::EAX, + cpInL, SDValue()); + cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, + Regs64bit ? X86::RDX : X86::EDX, + cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; - swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), - DAG.getConstant(0, MVT::i32)); - swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), - DAG.getConstant(1, MVT::i32)); - swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL, - cpInH.getValue(1)); - swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH, - swapInL.getValue(1)); + swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), + DAG.getConstant(0, HalfT)); + swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), + DAG.getConstant(1, HalfT)); + swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, + Regs64bit ? X86::RBX : X86::EBX, + swapInL, cpInH.getValue(1)); + swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, + Regs64bit ? X86::RCX : X86::ECX, + swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), swapInH.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, + unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : + X86ISD::LCMPXCHG8_DAG; + SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, 3, T, MMO); - SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, - MVT::i32, Result.getValue(1)); - SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, - MVT::i32, cpOutL.getValue(2)); + SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, + Regs64bit ? X86::RAX : X86::EAX, + HalfT, Result.getValue(1)); + SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, + Regs64bit ? X86::RDX : X86::EDX, + HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2)); Results.push_back(cpOutH.getValue(1)); return; } @@ -9478,6 +10609,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_SWAP: ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); return; + case ISD::ATOMIC_LOAD: + ReplaceATOMIC_LOAD(N, Results, DAG); } } @@ -9527,11 +10660,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; - case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::FHADD: return "X86ISD::FHADD"; + case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; @@ -9570,6 +10704,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; + case X86ISD::ANDN: return "X86ISD::ANDN"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -9596,9 +10731,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; - case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; - case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; @@ -9610,16 +10742,24 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; + case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; + case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; + case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; + case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; + case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; + case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; } } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Reloc::Model R = getTargetMachine().getRelocationModel(); @@ -9671,7 +10811,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, } -bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { +bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); @@ -9691,7 +10831,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return true; } -bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { +bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } @@ -9715,7 +10855,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -9725,7 +10865,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || isUNPCKLMask(M, VT) || isUNPCKHMask(M, VT) || isUNPCKL_v_undef_Mask(M, VT) || @@ -10158,7 +11298,9 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } - BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + BuildMI(*BB, MI, dl, + TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr), + MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); @@ -10513,6 +11655,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MBB->addSuccessor(EndMBB); } + unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; @@ -10521,7 +11664,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset), MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); - BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) + BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) .addFrameIndex(RegSaveFrameIndex) .addImm(/*Scale=*/1) .addReg(/*IndexReg=*/0) @@ -10565,17 +11708,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. - const MachineFunction *MF = BB->getParent(); - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - BitVector ReservedRegs = TRI->getReservedRegs(*MF); - - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); - if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; - unsigned Reg = MO.getReg(); - if (Reg != X86::EFLAGS) continue; - copy0MBB->addLiveIn(Reg); - sinkMBB->addLiveIn(Reg); + if (!MI->killsRegister(X86::EFLAGS)) { + copy0MBB->addLiveIn(X86::EFLAGS); + sinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of BB and its successor edges to sinkMBB. @@ -10611,6 +11746,119 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, } MachineBasicBlock * +X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, + bool Is64Bit) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + + assert(EnableSegmentedStacks); + + unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; + unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; + + // BB: + // ... [Till the alloca] + // If stacklet is not large enough, jump to mallocMBB + // + // bumpMBB: + // Allocate by subtracting from RSP + // Jump to continueMBB + // + // mallocMBB: + // Allocate by call to runtime + // + // continueMBB: + // ... + // [rest of original BB] + // + + MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *AddrRegClass = + getRegClassFor(Is64Bit ? MVT::i64:MVT::i32); + + unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), + bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), + tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), + sizeVReg = MI->getOperand(1).getReg(), + physSPReg = Is64Bit ? X86::RSP : X86::ESP; + + MachineFunction::iterator MBBIter = BB; + ++MBBIter; + + MF->insert(MBBIter, bumpMBB); + MF->insert(MBBIter, mallocMBB); + MF->insert(MBBIter, continueMBB); + + continueMBB->splice(continueMBB->begin(), BB, llvm::next + (MachineBasicBlock::iterator(MI)), BB->end()); + continueMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Add code to the main basic block to check if the stack limit has been hit, + // and if so, jump to mallocMBB otherwise to bumpMBB. + BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); + BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg) + .addReg(tmpSPVReg).addReg(sizeVReg); + BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr)) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg) + .addReg(tmpSPVReg); + BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB); + + // bumpMBB simply decreases the stack pointer, since we know the current + // stacklet has enough space. + BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) + .addReg(tmpSPVReg); + BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) + .addReg(tmpSPVReg); + BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); + + // Calls into a routine in libgcc to allocate more space from the heap. + if (Is64Bit) { + BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) + .addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI); + } else { + BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) + .addImm(12); + BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack_allocate_stack_space"); + } + + if (!Is64Bit) + BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) + .addImm(16); + + BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) + .addReg(Is64Bit ? X86::RAX : X86::EAX); + BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); + + // Set up the CFG correctly. + BB->addSuccessor(bumpMBB); + BB->addSuccessor(mallocMBB); + mallocMBB->addSuccessor(continueMBB); + bumpMBB->addSuccessor(continueMBB); + + // Take care of the PHI nodes. + BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), + MI->getOperand(0).getReg()) + .addReg(mallocPtrVReg).addMBB(mallocMBB) + .addReg(bumpSPPtrVReg).addMBB(bumpMBB); + + // Delete the original pseudo instruction. + MI->eraseFromParent(); + + // And we're done. + return continueMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -10718,11 +11966,11 @@ MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); + default: assert(0 && "Unexpected instr type to insert"); case X86::TAILJMPd64: case X86::TAILJMPr64: case X86::TAILJMPm64: - assert(!"TAILJMP64 would not be touched here."); + assert(0 && "TAILJMP64 would not be touched here."); case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: @@ -10745,6 +11993,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); + case X86::SEG_ALLOCA_32: + return EmitLoweredSegAlloca(MI, BB, false); + case X86::SEG_ALLOCA_64: + return EmitLoweredSegAlloca(MI, BB, true); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); @@ -10754,6 +12006,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_V4F32: case X86::CMOV_V2F64: case X86::CMOV_V2I64: + case X86::CMOV_V8F32: + case X86::CMOV_V4F64: + case X86::CMOV_V4I64: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: @@ -11074,6 +12329,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), Mask.getBitWidth() - 1); break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned NumLoBits = 0; + switch (IntId) { + default: break; + case Intrinsic::x86_sse_movmsk_ps: + case Intrinsic::x86_avx_movmsk_ps_256: + case Intrinsic::x86_sse2_movmsk_pd: + case Intrinsic::x86_avx_movmsk_pd_256: + case Intrinsic::x86_mmx_pmovmskb: + case Intrinsic::x86_sse2_pmovmskb_128: { + // High bits of movmskp{s|d}, pmovmskb are known zero. + switch (IntId) { + case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break; + case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break; + case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break; + case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break; + case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break; + case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; + } + KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), + Mask.getBitWidth() - NumLoBits); + break; + } + } + break; + } } } @@ -11102,23 +12384,132 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -/// PerformShuffleCombine - Combine a vector_shuffle that is equal to -/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load -/// if the load addresses are consecutive, non-overlapping, and in the right -/// order. +/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the +/// same as extracting the high 128-bit part of 256-bit vector and then +/// inserting the result into the low part of a new 256-bit vector +static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + +/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the +/// same as extracting the low 128-bit part of 256-bit vector and then +/// inserting the result into the high part of a new 256-bit vector +static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1> + for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + +/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. +static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + DebugLoc dl = N->getDebugLoc(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + if (V1.getOpcode() == ISD::CONCAT_VECTORS && + V2.getOpcode() == ISD::CONCAT_VECTORS) { + // + // 0,0,0,... + // | + // V UNDEF BUILD_VECTOR UNDEF + // \ / \ / + // CONCAT_VECTOR CONCAT_VECTOR + // \ / + // \ / + // RESULT: V + zero extended + // + if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR || + V2.getOperand(1).getOpcode() != ISD::UNDEF || + V1.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode())) + return SDValue(); + + // To match the shuffle mask, the first half of the mask should + // be exactly the first vector, and all the rest a splat with the + // first element of the second one. + for (int i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || + !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) + return SDValue(); + + // Emit a zeroed vector and insert the desired subvector on its + // first half. + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), + DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + //===--------------------------------------------------------------------===// + // Combine some shuffles into subvector extracts and inserts: + // + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + if (isShuffleHigh128VectorInsertLow(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1> + if (isShuffleLow128VectorInsertHigh(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + return SDValue(); +} + +/// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 128) - return SDValue(); - // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) return SDValue(); + // Combine 256-bit vector shuffles. This is only profitable when in AVX mode + if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && + N->getOpcode() == ISD::VECTOR_SHUFFLE) + return PerformShuffleCombine256(N, DAG, DCI); + + // Only handle 128 wide vector from here on. + if (VT.getSizeInBits() != 128) + return SDValue(); + + // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, + // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are + // consecutive, non-overlapping, and in the right order. SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); @@ -11209,7 +12600,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. +/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT +/// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); @@ -11217,14 +12609,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); + EVT VT = LHS.getValueType(); // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom x<y?x:y but not // x<=y?x:y, because of how they handle negative zero (which can be // ignored in unsafe-math mode). - if (Subtarget->hasSSE2() && - (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && - Cond.getOpcode() == ISD::SETCC) { + if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && + VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + (Subtarget->hasXMMInt() || + (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); unsigned Opcode = 0; @@ -11267,7 +12661,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. if (!UnsafeFPMath && - !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMAX; break; @@ -11680,7 +13074,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) + if (!Subtarget->hasXMMInt()) return SDValue(); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) @@ -11796,7 +13190,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. - if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { + if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); @@ -11864,6 +13258,36 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector +/// so it can be folded inside ANDNP. +static bool CanFoldXORWithAllOnes(const SDNode *N) { + EVT VT = N->getValueType(0); + + // Match direct AllOnes for 128 and 256-bit vectors + if (ISD::isBuildVectorAllOnes(N)) + return true; + + // Look through a bit convert. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + // Sometimes the operand may come from a insert_subvector building a 256-bit + // allones vector + if (VT.getSizeInBits() == 256 && + N->getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + + if (V1.getOpcode() == ISD::INSERT_SUBVECTOR && + V1.getOperand(0).getOpcode() == ISD::UNDEF && + ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) && + ISD::isBuildVectorAllOnes(V2.getNode())) + return true; + } + + return false; +} + static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -11874,11 +13298,28 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; + EVT VT = N->getValueType(0); + + // Create ANDN instructions + if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Check LHS for not + if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1); + // Check RHS for not + if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); + + return SDValue(); + } + // Want to form ANDNP nodes: // 1) In the hopes of then easily combining them with OR and AND nodes // to form PBLEND/PSIGN. // 2) To match ANDN packed intrinsics - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); @@ -11888,12 +13329,14 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + CanFoldXORWithAllOnes(N0.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + CanFoldXORWithAllOnes(N1.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); @@ -11917,7 +13360,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); // look for psign/blend - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS if (N0.getOpcode() == X86ISD::ANDNP) @@ -11990,13 +13433,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } } // PBLENDVB only available on SSE 4.1 - if (!Subtarget->hasSSE41()) + if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) return SDValue(); X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); } } @@ -12057,24 +13500,211 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. +static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + LoadSDNode *Ld = cast<LoadSDNode>(N); + EVT RegVT = Ld->getValueType(0); + EVT MemVT = Ld->getMemoryVT(); + DebugLoc dl = Ld->getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + ISD::LoadExtType Ext = Ld->getExtensionType(); + + // If this is a vector EXT Load then attempt to optimize it using a + // shuffle. We need SSE4 for the shuffles. + // TODO: It is possible to support ZExt by zeroing the undef values + // during the shuffle phase or after the shuffle. + if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { + assert(MemVT != RegVT && "Cannot extend to the same type"); + assert(MemVT.isVector() && "Must load a vector from memory"); + + unsigned NumElems = RegVT.getVectorNumElements(); + unsigned RegSz = RegVT.getSizeInBits(); + unsigned MemSz = MemVT.getSizeInBits(); + assert(RegSz > MemSz && "Register size must be greater than the mem size"); + // All sizes must be a power of two + if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue(); + + // Attempt to load the original value using a single load op. + // Find a scalar type which is equal to the loaded word size. + MVT SclrLoadTy = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) { + SclrLoadTy = Tp; + break; + } + } + + // Proceed if a load word is found. + if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue(); + + EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy, + RegSz/SclrLoadTy.getSizeInBits()); + + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + RegSz/MemVT.getScalarType().getSizeInBits()); + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + // Perform a single load. + SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), + Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->getAlignment()); + + // Insert the word loaded into a vector. + SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + LoadUnitVecVT, ScalarLoad); + + // Bitcast the loaded value to a vector of the original element type, in + // the size of the target vector type. + SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector); + unsigned SizeRatio = RegSz/MemSz; + + // Redistribute the loaded elements into the different locations. + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i; + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, + DAG.getUNDEF(SlicedVec.getValueType()), + ShuffleVec.data()); + + // Bitcast to the requested type. + Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + // Replace the original load with the new sequence + // and return the new chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff); + return SDValue(ScalarLoad.getNode(), 1); + } + + return SDValue(); +} + /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + StoreSDNode *St = cast<StoreSDNode>(N); + EVT VT = St->getValue().getValueType(); + EVT StVT = St->getMemoryVT(); + DebugLoc dl = St->getDebugLoc(); + SDValue StoredVal = St->getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // If we are saving a concatination of two XMM registers, perform two stores. + // This is better in Sandy Bridge cause one 256-bit mem op is done via two + // 128-bit ones. If in the future the cost becomes only one memory access the + // first version would be better. + if (VT.getSizeInBits() == 256 && + StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && + StoredVal.getNumOperands() == 2) { + + SDValue Value0 = StoredVal.getOperand(0); + SDValue Value1 = StoredVal.getOperand(1); + + SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); + SDValue Ptr0 = St->getBasePtr(); + SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride); + + SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); + } + + // Optimize trunc store (of multiple scalars) to shuffle and store. + // First, pack all of the elements in one place. Next, store to memory + // in fewer chunks. + if (St->isTruncatingStore() && VT.isVector()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromSz) % ToSz) return SDValue(); + + unsigned SizeRatio = FromSz / ToSz; + + assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), + StVT.getScalarType(), NumElems*SizeRatio); + + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue()); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz) + StoreType = Tp; + } + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue Ptr = St->getBasePtr(); + + // Perform one or more big stores into memory. + for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + StoreType, ShuffWide, + DAG.getIntPtrConstant(i)); + SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + Chains.push_back(Ch); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], + Chains.size()); + } + + // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right // places to insert EMMS. This qualifies as a quick hack. // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. - StoreSDNode *St = cast<StoreSDNode>(N); - EVT VT = St->getValue().getValueType(); if (VT.getSizeInBits() != 64) return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps - && Subtarget->hasSSE2(); + && Subtarget->hasXMMInt(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && @@ -12172,6 +13802,150 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal" +/// and return the operands for the horizontal operation in LHS and RHS. A +/// horizontal operation performs the binary operation on successive elements +/// of its first operand, then on successive elements of its second operand, +/// returning the resulting values in a vector. For example, if +/// A = < float a0, float a1, float a2, float a3 > +/// and +/// B = < float b0, float b1, float b2, float b3 > +/// then the result of doing a horizontal operation on A and B is +/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >. +/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form +/// A horizontal-op B, for some already available A and B, and if so then LHS is +/// set to A, RHS to B, and the routine returns 'true'. +/// Note that the binary operation should have the property that if one of the +/// operands is UNDEF then the result is UNDEF. +static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { + // Look for the following pattern: if + // A = < float a0, float a1, float a2, float a3 > + // B = < float b0, float b1, float b2, float b3 > + // and + // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6> + // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7> + // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 > + // which is A horizontal-op B. + + // At least one of the operands should be a vector shuffle. + if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE && + RHS.getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + EVT VT = LHS.getValueType(); + unsigned N = VT.getVectorNumElements(); + + // View LHS in the form + // LHS = VECTOR_SHUFFLE A, B, LMask + // If LHS is not a shuffle then pretend it is the shuffle + // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1> + // NOTE: in what follows a default initialized SDValue represents an UNDEF of + // type VT. + SDValue A, B; + SmallVector<int, 8> LMask(N); + if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { + if (LHS.getOperand(0).getOpcode() != ISD::UNDEF) + A = LHS.getOperand(0); + if (LHS.getOperand(1).getOpcode() != ISD::UNDEF) + B = LHS.getOperand(1); + cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask); + } else { + if (LHS.getOpcode() != ISD::UNDEF) + A = LHS; + for (unsigned i = 0; i != N; ++i) + LMask[i] = i; + } + + // Likewise, view RHS in the form + // RHS = VECTOR_SHUFFLE C, D, RMask + SDValue C, D; + SmallVector<int, 8> RMask(N); + if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { + if (RHS.getOperand(0).getOpcode() != ISD::UNDEF) + C = RHS.getOperand(0); + if (RHS.getOperand(1).getOpcode() != ISD::UNDEF) + D = RHS.getOperand(1); + cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask); + } else { + if (RHS.getOpcode() != ISD::UNDEF) + C = RHS; + for (unsigned i = 0; i != N; ++i) + RMask[i] = i; + } + + // Check that the shuffles are both shuffling the same vectors. + if (!(A == C && B == D) && !(A == D && B == C)) + return false; + + // If everything is UNDEF then bail out: it would be better to fold to UNDEF. + if (!A.getNode() && !B.getNode()) + return false; + + // If A and B occur in reverse order in RHS, then "swap" them (which means + // rewriting the mask). + if (A != C) + for (unsigned i = 0; i != N; ++i) { + unsigned Idx = RMask[i]; + if (Idx < N) + RMask[i] += N; + else if (Idx < 2*N) + RMask[i] -= N; + } + + // At this point LHS and RHS are equivalent to + // LHS = VECTOR_SHUFFLE A, B, LMask + // RHS = VECTOR_SHUFFLE A, B, RMask + // Check that the masks correspond to performing a horizontal operation. + for (unsigned i = 0; i != N; ++i) { + unsigned LIdx = LMask[i], RIdx = RMask[i]; + + // Ignore any UNDEF components. + if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N)) + || (!B.getNode() && (LIdx >= N || RIdx >= N))) + continue; + + // Check that successive elements are being operated on. If not, this is + // not a horizontal operation. + if (!(LIdx == 2*i && RIdx == 2*i + 1) && + !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i)) + return false; + } + + LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it. + RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it. + return true; +} + +/// PerformFADDCombine - Do target-specific dag combines on floating point adds. +static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Try to synthesize horizontal adds from adds of shuffles. + if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && + (VT == MVT::v4f32 || VT == MVT::v2f64) && + isHorizontalBinOp(LHS, RHS, true)) + return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); + return SDValue(); +} + +/// PerformFSUBCombine - Do target-specific dag combines on floating point subs. +static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Try to synthesize horizontal subs from subs of shuffles. + if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && + (VT == MVT::v4f32 || VT == MVT::v2f64) && + isHorizontalBinOp(LHS, RHS, false)) + return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); + return SDValue(); +} + /// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and /// X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { @@ -12326,7 +14100,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, // (add Y, (setne X, 0)) -> sbb -1, Y // (sub (sete X, 0), Y) -> sbb 0, Y // (sub (setne X, 0), Y) -> adc -1, Y -static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { +static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { DebugLoc DL = N->getDebugLoc(); // Look through ZExts. @@ -12362,6 +14136,31 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, OtherVal.getValueType()), NewCmp); } +static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // X86 can't encode an immediate LHS of a sub. See if we can push the + // negation into a preceding instruction. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) { + // If the RHS of the sub is a XOR with one use and a constant, invert the + // immediate. Then add one to the LHS of the sub so we can turn + // X-Y -> X+~Y+1, saving one register. + if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR && + isa<ConstantSDNode>(Op1.getOperand(1))) { + APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue(); + EVT VT = Op0.getValueType(); + SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT, + Op1.getOperand(0), + DAG.getConstant(~XorC, VT)); + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor, + DAG.getConstant(C->getAPIntValue()+1, VT)); + } + } + + return OptimizeConditionalInDecrement(N, DAG); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -12369,10 +14168,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::ADD: - case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG); + case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG); + case ISD::SUB: return PerformSubCombine(N, DAG); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: @@ -12380,8 +14180,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); + case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); + case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); + case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); @@ -12398,14 +14201,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: @@ -12415,7 +14218,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); } return SDValue(); @@ -12551,7 +14359,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "${0:q}")) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12572,7 +14380,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12603,7 +14411,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12629,7 +14437,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { SplitString(AsmPieces[2], Words, " \t,"); if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && Words[2] == "%edx") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12700,7 +14508,7 @@ TargetLowering::ConstraintWeight // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index b603678..342a5e6 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -175,8 +175,14 @@ namespace llvm { /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, - /// PBLENDVB - Variable blend - PBLENDVB, + /// BLEND family of opcodes + BLENDV, + + /// FHADD - Floating point horizontal add. + FHADD, + + /// FHSUB - Floating point horizontal sub. + FHSUB, /// FMAX, FMIN - Floating point max and min. /// @@ -222,6 +228,8 @@ namespace llvm { ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, + ANDN, // ANDN - Bitwise AND NOT with FLAGS results. + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -257,12 +265,12 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPS, - VUNPCKLPD, VUNPCKLPSY, VUNPCKLPDY, UNPCKHPS, UNPCKHPD, + VUNPCKHPSY, + VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, @@ -271,6 +279,12 @@ namespace llvm { PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPERMILPS, + VPERMILPSY, + VPERMILPD, + VPERMILPDY, + VPERM2F128, + VBROADCAST, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded @@ -280,6 +294,11 @@ namespace llvm { // WIN_ALLOCA - Windows's _chkstk call to do stack probing. WIN_ALLOCA, + // SEG_ALLOCA - For allocating variable amounts of stack space when using + // segmented stacks. Check if the current stacklet has enough space, and + // falls back to heap allocation if not. + SEG_ALLOCA, + // Memory barrier MEMBARRIER, MFENCE, @@ -297,9 +316,10 @@ namespace llvm { ATOMNAND64_DAG, ATOMSWAP64_DAG, - // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. + // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. LCMPXCHG_DAG, LCMPXCHG8_DAG, + LCMPXCHG16_DAG, // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -407,20 +427,16 @@ namespace llvm { /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(ShuffleVectorSDNode *N); + bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(ShuffleVectorSDNode *N); + bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool isMOVDDUPMask(ShuffleVectorSDNode *N); - /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to PALIGNR. - bool isPALIGNRMask(ShuffleVectorSDNode *N); - /// isVEXTRACTF128Index - Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for input to VEXTRACTF128. @@ -505,7 +521,7 @@ namespace llvm { /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at /// 4-byte boundaries. - virtual unsigned getByValTypeAlignment(const Type *Ty) const; + virtual unsigned getByValTypeAlignment(Type *Ty) const; /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove @@ -564,8 +580,8 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + /// getSetCCResultType - Return the value type to use for ISD::SETCC. + virtual EVT getSetCCResultType(EVT VT) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -617,12 +633,12 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. - virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a @@ -633,7 +649,7 @@ namespace llvm { /// does not necessarily apply to truncate instructions. e.g. on x86-64, /// all instructions that define 32-bit values implicit zero-extend the /// result out to 64 bits. - virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; /// isNarrowingProfitable - Return true if it's profitable to narrow @@ -813,11 +829,14 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; @@ -825,6 +844,7 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE @@ -931,6 +951,10 @@ namespace llvm { MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + bool Is64Bit) const; + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td index 9f7a4b0..74b647a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -650,6 +650,15 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> let isCodeGenOnly = 1; } +// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). +class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> + : ITy<opcode, MRMSrcReg, typeinfo, (outs), + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", []> { + // The disassembler should know about this, but not the asmparser. + let isCodeGenOnly = 1; +} + // BinOpRM - Instructions like "add reg, reg, [mem]". class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> @@ -857,11 +866,10 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, // BinOpAI - Instructions like "add %eax, %eax, imm". class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg> + Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, !strconcat("{$src, %", areg.AsmName, "|%", - areg.AsmName, ", $src}"), []> { + mnemonic, operands, []> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -926,10 +934,14 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -993,10 +1005,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -1017,10 +1033,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; - def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; - def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; - def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; @@ -1056,10 +1072,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -1117,9 +1137,37 @@ let Defs = [EFLAGS] in { def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; - def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>; - def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>; - def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>; - def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>; -} + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL, + "{$src, %al|AL, $src}">; + def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX, + "{$src, %ax|AX, $src}">; + def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX, + "{$src, %rax|RAX, $src}">; + + // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the + // register class is constrained to GR8_NOREX. + let isPseudo = 1 in + def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), + "", []>; +} +//===----------------------------------------------------------------------===// +// ANDN Instruction +// +multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, + PatFrag ld_frag> { + def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>; + def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, + (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>; +} + +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index adcc747..da28690 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -106,6 +106,26 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), "# dynamic stack allocation", [(X86WinAlloca)]>; + +// When using segmented stacks these are lowered into instructions which first +// check if the current stacklet has enough free memory. If it does, memory is +// allocated by bumping the stack pointer. Otherwise memory is allocated from +// the heap. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in +def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), + "# variable sized alloca for segmented stacks", + [(set GR32:$dst, + (X86SegAlloca GR32:$size))]>, + Requires<[In32BitMode]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in +def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), + "# variable sized alloca for segmented stacks", + [(set GR64:$dst, + (X86SegAlloca GR64:$size))]>, + Requires<[In64BitMode]>; + } @@ -329,18 +349,11 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions -let Constraints = "$src1 = $dst" in { - -// Conditional moves -let Uses = [EFLAGS] in { - // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional -// i8 register pressure. Note that CMOV_GR8 is conservatively considered to -// clobber EFLAGS, because if one of the operands is zero, the expansion -// could involve an xor. -let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { +// i8 register pressure. +let usesCustomInserter = 1, Uses = [EFLAGS] in { def CMOV_GR8 : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), "#CMOV_GR8 PSEUDO!", @@ -380,10 +393,7 @@ def CMOV_RFP80 : I<0, Pseudo, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; } // Predicates = [NoCMov] -} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] -} // Uses = [EFLAGS] - -} // Constraints = "$src1 = $dst" in +} // UsesCustomInserter = 1, Uses = [EFLAGS] //===----------------------------------------------------------------------===// @@ -532,7 +542,7 @@ def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", - [(X86MemBarrier)]>, Requires<[HasSSE2]>; + [(X86MemBarrier)]>; // TODO: Get this to fold the constant into the instruction. let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in @@ -630,8 +640,8 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; -defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">; -defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">; +defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; +defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { @@ -665,12 +675,20 @@ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), // Atomic compare and swap. let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - isCodeGenOnly = 1 in { + isCodeGenOnly = 1 in def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; -} + +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], + isCodeGenOnly = 1 in +def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), + "lock\n\t" + "cmpxchg16b\t$ptr", + [(X86cas16 addr:$ptr)]>, TB, LOCK, + Requires<[HasCmpxchg16b]>; + let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), "lock\n\t" @@ -695,7 +713,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" - "cmpxchgq\t$swap,$ptr", + "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; } @@ -718,11 +736,37 @@ def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), TB, LOCK; def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" - "xadd\t$val, $ptr", + "xadd{q}\t{$val, $ptr|$ptr, $val}", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; } +def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR8:$dst, (atomic_load_8 addr:$src))]>; +def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR16:$dst, (atomic_load_16 addr:$src))]>; +def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR32:$dst, (atomic_load_32 addr:$src))]>; +def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR64:$dst, (atomic_load_64 addr:$src))]>; + +def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_8 addr:$dst, GR8 :$src)]>; +def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_16 addr:$dst, GR16:$src)]>; +def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_32 addr:$dst, GR32:$src)]>; +def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_64 addr:$dst, GR64:$src)]>; + //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions. //===----------------------------------------------------------------------===// @@ -759,6 +803,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { [(set VR128:$dst, (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)))]>; + def CMOV_V8F32 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V8F32 PSEUDO!", + [(set VR256:$dst, + (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V4F64 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V4F64 PSEUDO!", + [(set VR256:$dst, + (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V4I64 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V4I64 PSEUDO!", + [(set VR256:$dst, + (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; } diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td index 2e1d523..e62e6b7 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td +++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td @@ -76,12 +76,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, - (outs GR32_NOREX:$dst), (ins GR8:$src), + (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, - (outs GR32_NOREX:$dst), (ins i8mem:$src), + (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", []>, TB; diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index 6d89bcc..0a1590b 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -113,6 +113,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } +class VEX_LIG { bit ignoresVEX_L = 1; } class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, @@ -150,6 +151,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register // to be encoded in a immediate field? bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? + bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? // TSFlags layout should be kept in sync with X86InstrInfo.h. @@ -169,7 +171,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{35} = hasVEX_4VPrefix; let TSFlags{36} = hasVEX_i8ImmReg; let TSFlags{37} = hasVEX_L; - let TSFlags{38} = has3DNow0F0FOpcode; + let TSFlags{38} = ignoresVEX_L; + let TSFlags{39} = has3DNow0F0FOpcode; } class PseudoI<dag oops, dag iops, list<dag> pattern> @@ -501,6 +504,9 @@ class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : PDI<o, F, outs, ins, asm, pattern>, REX_W; +class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : VPDI<o, F, outs, ins, asm, pattern>, VEX_W; // MMX Instruction templates // diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b00109c..af919fb 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -39,6 +39,8 @@ def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; +def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; +def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; @@ -49,18 +51,15 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignb : SDNode<"X86ISD::PSIGNB", +def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignw : SDNode<"X86ISD::PSIGNW", +def X86psignw : SDNode<"X86ISD::PSIGNW", SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignd : SDNode<"X86ISD::PSIGND", +def X86psignd : SDNode<"X86ISD::PSIGND", SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pblendv : SDNode<"X86ISD::PBLENDVB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", @@ -109,6 +108,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; + def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; @@ -133,12 +134,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; +def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; + +def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; +def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; +def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; +def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; @@ -150,6 +154,15 @@ def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; +def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; +def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; +def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>; + +def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; + +def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -193,17 +206,28 @@ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; -// Like 'store', but always requires vector alignment. +// Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast<StoreSDNode>(N)->getAlignment() >= 16; }]>; -// Like 'load', but always requires vector alignment. +// Like 'store', but always requires 256-bit vector alignment. +def alignedstore256 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 32; +}]>; + +// Like 'load', but always requires 128-bit vector alignment. def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; +// Like 'load', but always requires 256-bit vector alignment. +def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 32; +}]>; + def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), @@ -221,13 +245,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // 256-bit aligned load pattern fragments def alignedloadv8f32 : PatFrag<(ops node:$ptr), - (v8f32 (alignedload node:$ptr))>; + (v8f32 (alignedload256 node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), - (v4f64 (alignedload node:$ptr))>; + (v4f64 (alignedload256 node:$ptr))>; def alignedloadv8i32 : PatFrag<(ops node:$ptr), - (v8i32 (alignedload node:$ptr))>; + (v8i32 (alignedload256 node:$ptr))>; def alignedloadv4i64 : PatFrag<(ops node:$ptr), - (v4i64 (alignedload node:$ptr))>; + (v4i64 (alignedload256 node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to @@ -356,7 +380,7 @@ def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N)); }]>; -// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to +// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to // VINSERTF128 imm. def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{ return getI8Imm(X86::getInsertVINSERTF128Immediate(N)); @@ -398,16 +422,6 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); }]>; -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); @@ -418,16 +432,6 @@ def unpckh : PatFrag<(ops node:$lhs, node:$rhs), return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); }]>; -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - def pshufd : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); @@ -448,11 +452,6 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_pshuflw_imm>; -def palign : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_palign_imm>; - def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index), (extract_subvector node:$bigvec, node:$index), [{ @@ -465,3 +464,4 @@ def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec, node:$index), [{ return X86::isVINSERTF128Index(N); }], INSERT_get_vinsertf128_imm>; + diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 55b5835..3a02de0 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -53,6 +53,36 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden); +enum { + // Select which memory operand is being unfolded. + // (stored in bits 0 - 7) + TB_INDEX_0 = 0, + TB_INDEX_1 = 1, + TB_INDEX_2 = 2, + TB_INDEX_MASK = 0xff, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + + // Do not insert the reverse map (MemOp -> RegOp) into the table. + // This may be needed because there is a many -> one mapping. + TB_NO_REVERSE = 1 << 16, + + // Do not insert the forward map (RegOp -> MemOp) into the table. + // This is needed for Native Client, which prohibits branch + // instructions from using a memory operand. + TB_NO_FORWARD = 1 << 17, + + TB_FOLDED_LOAD = 1 << 18, + TB_FOLDED_STORE = 1 << 19 +}; + X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() ? X86::ADJCALLSTACKDOWN64 @@ -61,655 +91,829 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { - enum { - TB_NOT_REVERSABLE = 1U << 31, - TB_FLAGS = TB_NOT_REVERSABLE - }; - static const unsigned OpTbl2Addr[][2] = { - { X86::ADC32ri, X86::ADC32mi }, - { X86::ADC32ri8, X86::ADC32mi8 }, - { X86::ADC32rr, X86::ADC32mr }, - { X86::ADC64ri32, X86::ADC64mi32 }, - { X86::ADC64ri8, X86::ADC64mi8 }, - { X86::ADC64rr, X86::ADC64mr }, - { X86::ADD16ri, X86::ADD16mi }, - { X86::ADD16ri8, X86::ADD16mi8 }, - { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, - { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, - { X86::ADD16rr, X86::ADD16mr }, - { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, - { X86::ADD32ri, X86::ADD32mi }, - { X86::ADD32ri8, X86::ADD32mi8 }, - { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, - { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, - { X86::ADD32rr, X86::ADD32mr }, - { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, - { X86::ADD64ri32, X86::ADD64mi32 }, - { X86::ADD64ri8, X86::ADD64mi8 }, - { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, - { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, - { X86::ADD64rr, X86::ADD64mr }, - { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, - { X86::ADD8ri, X86::ADD8mi }, - { X86::ADD8rr, X86::ADD8mr }, - { X86::AND16ri, X86::AND16mi }, - { X86::AND16ri8, X86::AND16mi8 }, - { X86::AND16rr, X86::AND16mr }, - { X86::AND32ri, X86::AND32mi }, - { X86::AND32ri8, X86::AND32mi8 }, - { X86::AND32rr, X86::AND32mr }, - { X86::AND64ri32, X86::AND64mi32 }, - { X86::AND64ri8, X86::AND64mi8 }, - { X86::AND64rr, X86::AND64mr }, - { X86::AND8ri, X86::AND8mi }, - { X86::AND8rr, X86::AND8mr }, - { X86::DEC16r, X86::DEC16m }, - { X86::DEC32r, X86::DEC32m }, - { X86::DEC64_16r, X86::DEC64_16m }, - { X86::DEC64_32r, X86::DEC64_32m }, - { X86::DEC64r, X86::DEC64m }, - { X86::DEC8r, X86::DEC8m }, - { X86::INC16r, X86::INC16m }, - { X86::INC32r, X86::INC32m }, - { X86::INC64_16r, X86::INC64_16m }, - { X86::INC64_32r, X86::INC64_32m }, - { X86::INC64r, X86::INC64m }, - { X86::INC8r, X86::INC8m }, - { X86::NEG16r, X86::NEG16m }, - { X86::NEG32r, X86::NEG32m }, - { X86::NEG64r, X86::NEG64m }, - { X86::NEG8r, X86::NEG8m }, - { X86::NOT16r, X86::NOT16m }, - { X86::NOT32r, X86::NOT32m }, - { X86::NOT64r, X86::NOT64m }, - { X86::NOT8r, X86::NOT8m }, - { X86::OR16ri, X86::OR16mi }, - { X86::OR16ri8, X86::OR16mi8 }, - { X86::OR16rr, X86::OR16mr }, - { X86::OR32ri, X86::OR32mi }, - { X86::OR32ri8, X86::OR32mi8 }, - { X86::OR32rr, X86::OR32mr }, - { X86::OR64ri32, X86::OR64mi32 }, - { X86::OR64ri8, X86::OR64mi8 }, - { X86::OR64rr, X86::OR64mr }, - { X86::OR8ri, X86::OR8mi }, - { X86::OR8rr, X86::OR8mr }, - { X86::ROL16r1, X86::ROL16m1 }, - { X86::ROL16rCL, X86::ROL16mCL }, - { X86::ROL16ri, X86::ROL16mi }, - { X86::ROL32r1, X86::ROL32m1 }, - { X86::ROL32rCL, X86::ROL32mCL }, - { X86::ROL32ri, X86::ROL32mi }, - { X86::ROL64r1, X86::ROL64m1 }, - { X86::ROL64rCL, X86::ROL64mCL }, - { X86::ROL64ri, X86::ROL64mi }, - { X86::ROL8r1, X86::ROL8m1 }, - { X86::ROL8rCL, X86::ROL8mCL }, - { X86::ROL8ri, X86::ROL8mi }, - { X86::ROR16r1, X86::ROR16m1 }, - { X86::ROR16rCL, X86::ROR16mCL }, - { X86::ROR16ri, X86::ROR16mi }, - { X86::ROR32r1, X86::ROR32m1 }, - { X86::ROR32rCL, X86::ROR32mCL }, - { X86::ROR32ri, X86::ROR32mi }, - { X86::ROR64r1, X86::ROR64m1 }, - { X86::ROR64rCL, X86::ROR64mCL }, - { X86::ROR64ri, X86::ROR64mi }, - { X86::ROR8r1, X86::ROR8m1 }, - { X86::ROR8rCL, X86::ROR8mCL }, - { X86::ROR8ri, X86::ROR8mi }, - { X86::SAR16r1, X86::SAR16m1 }, - { X86::SAR16rCL, X86::SAR16mCL }, - { X86::SAR16ri, X86::SAR16mi }, - { X86::SAR32r1, X86::SAR32m1 }, - { X86::SAR32rCL, X86::SAR32mCL }, - { X86::SAR32ri, X86::SAR32mi }, - { X86::SAR64r1, X86::SAR64m1 }, - { X86::SAR64rCL, X86::SAR64mCL }, - { X86::SAR64ri, X86::SAR64mi }, - { X86::SAR8r1, X86::SAR8m1 }, - { X86::SAR8rCL, X86::SAR8mCL }, - { X86::SAR8ri, X86::SAR8mi }, - { X86::SBB32ri, X86::SBB32mi }, - { X86::SBB32ri8, X86::SBB32mi8 }, - { X86::SBB32rr, X86::SBB32mr }, - { X86::SBB64ri32, X86::SBB64mi32 }, - { X86::SBB64ri8, X86::SBB64mi8 }, - { X86::SBB64rr, X86::SBB64mr }, - { X86::SHL16rCL, X86::SHL16mCL }, - { X86::SHL16ri, X86::SHL16mi }, - { X86::SHL32rCL, X86::SHL32mCL }, - { X86::SHL32ri, X86::SHL32mi }, - { X86::SHL64rCL, X86::SHL64mCL }, - { X86::SHL64ri, X86::SHL64mi }, - { X86::SHL8rCL, X86::SHL8mCL }, - { X86::SHL8ri, X86::SHL8mi }, - { X86::SHLD16rrCL, X86::SHLD16mrCL }, - { X86::SHLD16rri8, X86::SHLD16mri8 }, - { X86::SHLD32rrCL, X86::SHLD32mrCL }, - { X86::SHLD32rri8, X86::SHLD32mri8 }, - { X86::SHLD64rrCL, X86::SHLD64mrCL }, - { X86::SHLD64rri8, X86::SHLD64mri8 }, - { X86::SHR16r1, X86::SHR16m1 }, - { X86::SHR16rCL, X86::SHR16mCL }, - { X86::SHR16ri, X86::SHR16mi }, - { X86::SHR32r1, X86::SHR32m1 }, - { X86::SHR32rCL, X86::SHR32mCL }, - { X86::SHR32ri, X86::SHR32mi }, - { X86::SHR64r1, X86::SHR64m1 }, - { X86::SHR64rCL, X86::SHR64mCL }, - { X86::SHR64ri, X86::SHR64mi }, - { X86::SHR8r1, X86::SHR8m1 }, - { X86::SHR8rCL, X86::SHR8mCL }, - { X86::SHR8ri, X86::SHR8mi }, - { X86::SHRD16rrCL, X86::SHRD16mrCL }, - { X86::SHRD16rri8, X86::SHRD16mri8 }, - { X86::SHRD32rrCL, X86::SHRD32mrCL }, - { X86::SHRD32rri8, X86::SHRD32mri8 }, - { X86::SHRD64rrCL, X86::SHRD64mrCL }, - { X86::SHRD64rri8, X86::SHRD64mri8 }, - { X86::SUB16ri, X86::SUB16mi }, - { X86::SUB16ri8, X86::SUB16mi8 }, - { X86::SUB16rr, X86::SUB16mr }, - { X86::SUB32ri, X86::SUB32mi }, - { X86::SUB32ri8, X86::SUB32mi8 }, - { X86::SUB32rr, X86::SUB32mr }, - { X86::SUB64ri32, X86::SUB64mi32 }, - { X86::SUB64ri8, X86::SUB64mi8 }, - { X86::SUB64rr, X86::SUB64mr }, - { X86::SUB8ri, X86::SUB8mi }, - { X86::SUB8rr, X86::SUB8mr }, - { X86::XOR16ri, X86::XOR16mi }, - { X86::XOR16ri8, X86::XOR16mi8 }, - { X86::XOR16rr, X86::XOR16mr }, - { X86::XOR32ri, X86::XOR32mi }, - { X86::XOR32ri8, X86::XOR32mi8 }, - { X86::XOR32rr, X86::XOR32mr }, - { X86::XOR64ri32, X86::XOR64mi32 }, - { X86::XOR64ri8, X86::XOR64mi8 }, - { X86::XOR64rr, X86::XOR64mr }, - { X86::XOR8ri, X86::XOR8mi }, - { X86::XOR8rr, X86::XOR8mr } + static const unsigned OpTbl2Addr[][3] = { + { X86::ADC32ri, X86::ADC32mi, 0 }, + { X86::ADC32ri8, X86::ADC32mi8, 0 }, + { X86::ADC32rr, X86::ADC32mr, 0 }, + { X86::ADC64ri32, X86::ADC64mi32, 0 }, + { X86::ADC64ri8, X86::ADC64mi8, 0 }, + { X86::ADC64rr, X86::ADC64mr, 0 }, + { X86::ADD16ri, X86::ADD16mi, 0 }, + { X86::ADD16ri8, X86::ADD16mi8, 0 }, + { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, + { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, + { X86::ADD16rr, X86::ADD16mr, 0 }, + { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, + { X86::ADD32ri, X86::ADD32mi, 0 }, + { X86::ADD32ri8, X86::ADD32mi8, 0 }, + { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, + { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32mr, 0 }, + { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, + { X86::ADD64ri32, X86::ADD64mi32, 0 }, + { X86::ADD64ri8, X86::ADD64mi8, 0 }, + { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, + { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64mr, 0 }, + { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, + { X86::ADD8ri, X86::ADD8mi, 0 }, + { X86::ADD8rr, X86::ADD8mr, 0 }, + { X86::AND16ri, X86::AND16mi, 0 }, + { X86::AND16ri8, X86::AND16mi8, 0 }, + { X86::AND16rr, X86::AND16mr, 0 }, + { X86::AND32ri, X86::AND32mi, 0 }, + { X86::AND32ri8, X86::AND32mi8, 0 }, + { X86::AND32rr, X86::AND32mr, 0 }, + { X86::AND64ri32, X86::AND64mi32, 0 }, + { X86::AND64ri8, X86::AND64mi8, 0 }, + { X86::AND64rr, X86::AND64mr, 0 }, + { X86::AND8ri, X86::AND8mi, 0 }, + { X86::AND8rr, X86::AND8mr, 0 }, + { X86::DEC16r, X86::DEC16m, 0 }, + { X86::DEC32r, X86::DEC32m, 0 }, + { X86::DEC64_16r, X86::DEC64_16m, 0 }, + { X86::DEC64_32r, X86::DEC64_32m, 0 }, + { X86::DEC64r, X86::DEC64m, 0 }, + { X86::DEC8r, X86::DEC8m, 0 }, + { X86::INC16r, X86::INC16m, 0 }, + { X86::INC32r, X86::INC32m, 0 }, + { X86::INC64_16r, X86::INC64_16m, 0 }, + { X86::INC64_32r, X86::INC64_32m, 0 }, + { X86::INC64r, X86::INC64m, 0 }, + { X86::INC8r, X86::INC8m, 0 }, + { X86::NEG16r, X86::NEG16m, 0 }, + { X86::NEG32r, X86::NEG32m, 0 }, + { X86::NEG64r, X86::NEG64m, 0 }, + { X86::NEG8r, X86::NEG8m, 0 }, + { X86::NOT16r, X86::NOT16m, 0 }, + { X86::NOT32r, X86::NOT32m, 0 }, + { X86::NOT64r, X86::NOT64m, 0 }, + { X86::NOT8r, X86::NOT8m, 0 }, + { X86::OR16ri, X86::OR16mi, 0 }, + { X86::OR16ri8, X86::OR16mi8, 0 }, + { X86::OR16rr, X86::OR16mr, 0 }, + { X86::OR32ri, X86::OR32mi, 0 }, + { X86::OR32ri8, X86::OR32mi8, 0 }, + { X86::OR32rr, X86::OR32mr, 0 }, + { X86::OR64ri32, X86::OR64mi32, 0 }, + { X86::OR64ri8, X86::OR64mi8, 0 }, + { X86::OR64rr, X86::OR64mr, 0 }, + { X86::OR8ri, X86::OR8mi, 0 }, + { X86::OR8rr, X86::OR8mr, 0 }, + { X86::ROL16r1, X86::ROL16m1, 0 }, + { X86::ROL16rCL, X86::ROL16mCL, 0 }, + { X86::ROL16ri, X86::ROL16mi, 0 }, + { X86::ROL32r1, X86::ROL32m1, 0 }, + { X86::ROL32rCL, X86::ROL32mCL, 0 }, + { X86::ROL32ri, X86::ROL32mi, 0 }, + { X86::ROL64r1, X86::ROL64m1, 0 }, + { X86::ROL64rCL, X86::ROL64mCL, 0 }, + { X86::ROL64ri, X86::ROL64mi, 0 }, + { X86::ROL8r1, X86::ROL8m1, 0 }, + { X86::ROL8rCL, X86::ROL8mCL, 0 }, + { X86::ROL8ri, X86::ROL8mi, 0 }, + { X86::ROR16r1, X86::ROR16m1, 0 }, + { X86::ROR16rCL, X86::ROR16mCL, 0 }, + { X86::ROR16ri, X86::ROR16mi, 0 }, + { X86::ROR32r1, X86::ROR32m1, 0 }, + { X86::ROR32rCL, X86::ROR32mCL, 0 }, + { X86::ROR32ri, X86::ROR32mi, 0 }, + { X86::ROR64r1, X86::ROR64m1, 0 }, + { X86::ROR64rCL, X86::ROR64mCL, 0 }, + { X86::ROR64ri, X86::ROR64mi, 0 }, + { X86::ROR8r1, X86::ROR8m1, 0 }, + { X86::ROR8rCL, X86::ROR8mCL, 0 }, + { X86::ROR8ri, X86::ROR8mi, 0 }, + { X86::SAR16r1, X86::SAR16m1, 0 }, + { X86::SAR16rCL, X86::SAR16mCL, 0 }, + { X86::SAR16ri, X86::SAR16mi, 0 }, + { X86::SAR32r1, X86::SAR32m1, 0 }, + { X86::SAR32rCL, X86::SAR32mCL, 0 }, + { X86::SAR32ri, X86::SAR32mi, 0 }, + { X86::SAR64r1, X86::SAR64m1, 0 }, + { X86::SAR64rCL, X86::SAR64mCL, 0 }, + { X86::SAR64ri, X86::SAR64mi, 0 }, + { X86::SAR8r1, X86::SAR8m1, 0 }, + { X86::SAR8rCL, X86::SAR8mCL, 0 }, + { X86::SAR8ri, X86::SAR8mi, 0 }, + { X86::SBB32ri, X86::SBB32mi, 0 }, + { X86::SBB32ri8, X86::SBB32mi8, 0 }, + { X86::SBB32rr, X86::SBB32mr, 0 }, + { X86::SBB64ri32, X86::SBB64mi32, 0 }, + { X86::SBB64ri8, X86::SBB64mi8, 0 }, + { X86::SBB64rr, X86::SBB64mr, 0 }, + { X86::SHL16rCL, X86::SHL16mCL, 0 }, + { X86::SHL16ri, X86::SHL16mi, 0 }, + { X86::SHL32rCL, X86::SHL32mCL, 0 }, + { X86::SHL32ri, X86::SHL32mi, 0 }, + { X86::SHL64rCL, X86::SHL64mCL, 0 }, + { X86::SHL64ri, X86::SHL64mi, 0 }, + { X86::SHL8rCL, X86::SHL8mCL, 0 }, + { X86::SHL8ri, X86::SHL8mi, 0 }, + { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, + { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, + { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, + { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, + { X86::SHR16r1, X86::SHR16m1, 0 }, + { X86::SHR16rCL, X86::SHR16mCL, 0 }, + { X86::SHR16ri, X86::SHR16mi, 0 }, + { X86::SHR32r1, X86::SHR32m1, 0 }, + { X86::SHR32rCL, X86::SHR32mCL, 0 }, + { X86::SHR32ri, X86::SHR32mi, 0 }, + { X86::SHR64r1, X86::SHR64m1, 0 }, + { X86::SHR64rCL, X86::SHR64mCL, 0 }, + { X86::SHR64ri, X86::SHR64mi, 0 }, + { X86::SHR8r1, X86::SHR8m1, 0 }, + { X86::SHR8rCL, X86::SHR8mCL, 0 }, + { X86::SHR8ri, X86::SHR8mi, 0 }, + { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, + { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, + { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, + { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, + { X86::SUB16ri, X86::SUB16mi, 0 }, + { X86::SUB16ri8, X86::SUB16mi8, 0 }, + { X86::SUB16rr, X86::SUB16mr, 0 }, + { X86::SUB32ri, X86::SUB32mi, 0 }, + { X86::SUB32ri8, X86::SUB32mi8, 0 }, + { X86::SUB32rr, X86::SUB32mr, 0 }, + { X86::SUB64ri32, X86::SUB64mi32, 0 }, + { X86::SUB64ri8, X86::SUB64mi8, 0 }, + { X86::SUB64rr, X86::SUB64mr, 0 }, + { X86::SUB8ri, X86::SUB8mi, 0 }, + { X86::SUB8rr, X86::SUB8mr, 0 }, + { X86::XOR16ri, X86::XOR16mi, 0 }, + { X86::XOR16ri8, X86::XOR16mi8, 0 }, + { X86::XOR16rr, X86::XOR16mr, 0 }, + { X86::XOR32ri, X86::XOR32mi, 0 }, + { X86::XOR32ri8, X86::XOR32mi8, 0 }, + { X86::XOR32rr, X86::XOR32mr, 0 }, + { X86::XOR64ri32, X86::XOR64mi32, 0 }, + { X86::XOR64ri8, X86::XOR64mi8, 0 }, + { X86::XOR64rr, X86::XOR64mr, 0 }, + { X86::XOR8ri, X86::XOR8mi, 0 }, + { X86::XOR8rr, X86::XOR8mr, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; - assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); - RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 0, folded load and store, no alignment requirement. - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - - assert(!MemOp2RegOpTable.count(MemOp) && - "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); - } - - // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][4] = { - { X86::BT16ri8, X86::BT16mi8, 1, 0 }, - { X86::BT32ri8, X86::BT32mi8, 1, 0 }, - { X86::BT64ri8, X86::BT64mi8, 1, 0 }, - { X86::CALL32r, X86::CALL32m, 1, 0 }, - { X86::CALL64r, X86::CALL64m, 1, 0 }, - { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, - { X86::CMP16ri, X86::CMP16mi, 1, 0 }, - { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, - { X86::CMP16rr, X86::CMP16mr, 1, 0 }, - { X86::CMP32ri, X86::CMP32mi, 1, 0 }, - { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, - { X86::CMP32rr, X86::CMP32mr, 1, 0 }, - { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, - { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, - { X86::CMP64rr, X86::CMP64mr, 1, 0 }, - { X86::CMP8ri, X86::CMP8mi, 1, 0 }, - { X86::CMP8rr, X86::CMP8mr, 1, 0 }, - { X86::DIV16r, X86::DIV16m, 1, 0 }, - { X86::DIV32r, X86::DIV32m, 1, 0 }, - { X86::DIV64r, X86::DIV64m, 1, 0 }, - { X86::DIV8r, X86::DIV8m, 1, 0 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, - { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1, 0 }, - { X86::IDIV32r, X86::IDIV32m, 1, 0 }, - { X86::IDIV64r, X86::IDIV64m, 1, 0 }, - { X86::IDIV8r, X86::IDIV8m, 1, 0 }, - { X86::IMUL16r, X86::IMUL16m, 1, 0 }, - { X86::IMUL32r, X86::IMUL32m, 1, 0 }, - { X86::IMUL64r, X86::IMUL64m, 1, 0 }, - { X86::IMUL8r, X86::IMUL8m, 1, 0 }, - { X86::JMP32r, X86::JMP32m, 1, 0 }, - { X86::JMP64r, X86::JMP64m, 1, 0 }, - { X86::MOV16ri, X86::MOV16mi, 0, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0, 0 }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, - { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, - { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, - { X86::MUL16r, X86::MUL16m, 1, 0 }, - { X86::MUL32r, X86::MUL32m, 1, 0 }, - { X86::MUL64r, X86::MUL64m, 1, 0 }, - { X86::MUL8r, X86::MUL8m, 1, 0 }, - { X86::SETAEr, X86::SETAEm, 0, 0 }, - { X86::SETAr, X86::SETAm, 0, 0 }, - { X86::SETBEr, X86::SETBEm, 0, 0 }, - { X86::SETBr, X86::SETBm, 0, 0 }, - { X86::SETEr, X86::SETEm, 0, 0 }, - { X86::SETGEr, X86::SETGEm, 0, 0 }, - { X86::SETGr, X86::SETGm, 0, 0 }, - { X86::SETLEr, X86::SETLEm, 0, 0 }, - { X86::SETLr, X86::SETLm, 0, 0 }, - { X86::SETNEr, X86::SETNEm, 0, 0 }, - { X86::SETNOr, X86::SETNOm, 0, 0 }, - { X86::SETNPr, X86::SETNPm, 0, 0 }, - { X86::SETNSr, X86::SETNSm, 0, 0 }, - { X86::SETOr, X86::SETOm, 0, 0 }, - { X86::SETPr, X86::SETPm, 0, 0 }, - { X86::SETSr, X86::SETSm, 0, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, - { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, - { X86::TEST16ri, X86::TEST16mi, 1, 0 }, - { X86::TEST32ri, X86::TEST32mi, 1, 0 }, - { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, - { X86::TEST8ri, X86::TEST8mi, 1, 0 } + unsigned MemOp = OpTbl2Addr[i][1]; + unsigned Flags = OpTbl2Addr[i][2]; + AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, + RegOp, MemOp, + // Index 0, folded load and store, no alignment requirement. + Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); + } + + static const unsigned OpTbl0[][3] = { + { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, + { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, + { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, + { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, + { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, + { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD }, + { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, + { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, + { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, + { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, + { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, + { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, + { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, + { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, + { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, + { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, + { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, + { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, + { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, + { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, + { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, + { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, + { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, + { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, + { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, + { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, + { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, + { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, + { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, + { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, + { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, + { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, + { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, + { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, + { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, + { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, + { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, + { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, + { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, + { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, + { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, + { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, + { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, + { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, + { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, + { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, + { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, + { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, + { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, + { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, + { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, + { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, + { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, + { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, + { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, + { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, + { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, + { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, + { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, + { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, + { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, + { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, + { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, + { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, + { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, + { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, + // AVX 128-bit versions of foldable instructions + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, + { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, + { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, + { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, + // AVX 256-bit foldable instructions + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; - unsigned FoldedLoad = OpTbl0[i][2]; - unsigned Align = OpTbl0[i][3]; - assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); - RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl0[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 0, folded load or store. - unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + unsigned MemOp = OpTbl0[i][1]; + unsigned Flags = OpTbl0[i][2]; + AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, + RegOp, MemOp, TB_INDEX_0 | Flags); } static const unsigned OpTbl1[][3] = { - { X86::CMP16rr, X86::CMP16rm, 0 }, - { X86::CMP32rr, X86::CMP32rm, 0 }, - { X86::CMP64rr, X86::CMP64rm, 0 }, - { X86::CMP8rr, X86::CMP8rm, 0 }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, - { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, - { X86::IMUL16rri, X86::IMUL16rmi, 0 }, - { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, - { X86::IMUL32rri, X86::IMUL32rmi, 0 }, - { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, - { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, - { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, - { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, - { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, - { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, - { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, - { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, - { X86::MOV16rr, X86::MOV16rm, 0 }, - { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV64rr, X86::MOV64rm, 0 }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, - { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, - { X86::MOV8rr, X86::MOV8rm, 0 }, - { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, - { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, - { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, - { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, - { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, - { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, - { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, - { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, - { X86::PSHUFDri, X86::PSHUFDmi, 16 }, - { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, - { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, - { X86::RCPPSr, X86::RCPPSm, 16 }, - { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, - { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, - { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, - { X86::SQRTPDr, X86::SQRTPDm, 16 }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, - { X86::SQRTPSr, X86::SQRTPSm, 16 }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, - { X86::SQRTSDr, X86::SQRTSDm, 0 }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, - { X86::SQRTSSr, X86::SQRTSSm, 0 }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, - { X86::TEST16rr, X86::TEST16rm, 0 }, - { X86::TEST32rr, X86::TEST32rm, 0 }, - { X86::TEST64rr, X86::TEST64rm, 0 }, - { X86::TEST8rr, X86::TEST8rm, 0 }, + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE }, + { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, + { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 }, + { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, - { X86::UCOMISSrr, X86::UCOMISSrm, 0 } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + // AVX 128-bit versions of foldable instructions + { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, + { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, + { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, + { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, + { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, + { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, + { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, + { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, + { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, + { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, + { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, + { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, + { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 }, + { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, + { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, + { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 }, + { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 }, + { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, + { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, + { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, + { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, + { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 }, + { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 }, + { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 }, + { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 }, + { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 }, + { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, + { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, + // AVX 256-bit foldable instructions + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; - unsigned Align = OpTbl1[i][2]; - assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); - RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl1[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 1, folded load - unsigned AuxInfo = 1 | (1 << 4); - assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + unsigned MemOp = OpTbl1[i][1]; + unsigned Flags = OpTbl1[i][2]; + AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, + RegOp, MemOp, + // Index 1, folded load + Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } static const unsigned OpTbl2[][3] = { - { X86::ADC32rr, X86::ADC32rm, 0 }, - { X86::ADC64rr, X86::ADC64rm, 0 }, - { X86::ADD16rr, X86::ADD16rm, 0 }, - { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD32rr, X86::ADD32rm, 0 }, - { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD64rr, X86::ADD64rm, 0 }, - { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD8rr, X86::ADD8rm, 0 }, - { X86::ADDPDrr, X86::ADDPDrm, 16 }, - { X86::ADDPSrr, X86::ADDPSrm, 16 }, - { X86::ADDSDrr, X86::ADDSDrm, 0 }, - { X86::ADDSSrr, X86::ADDSSrm, 0 }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, - { X86::AND16rr, X86::AND16rm, 0 }, - { X86::AND32rr, X86::AND32rm, 0 }, - { X86::AND64rr, X86::AND64rm, 0 }, - { X86::AND8rr, X86::AND8rm, 0 }, - { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, - { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, - { X86::ANDPDrr, X86::ANDPDrm, 16 }, - { X86::ANDPSrr, X86::ANDPSrm, 16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, - { X86::CMPPDrri, X86::CMPPDrmi, 16 }, - { X86::CMPPSrri, X86::CMPPSrmi, 16 }, - { X86::CMPSDrr, X86::CMPSDrm, 0 }, - { X86::CMPSSrr, X86::CMPSSrm, 0 }, - { X86::DIVPDrr, X86::DIVPDrm, 16 }, - { X86::DIVPSrr, X86::DIVPSrm, 16 }, - { X86::DIVSDrr, X86::DIVSDrm, 0 }, - { X86::DIVSSrr, X86::DIVSSrm, 0 }, - { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, - { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, - { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, - { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, - { X86::FsORPDrr, X86::FsORPDrm, 16 }, - { X86::FsORPSrr, X86::FsORPSrm, 16 }, - { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, - { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, - { X86::HADDPDrr, X86::HADDPDrm, 16 }, - { X86::HADDPSrr, X86::HADDPSrm, 16 }, - { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, - { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, - { X86::IMUL16rr, X86::IMUL16rm, 0 }, - { X86::IMUL32rr, X86::IMUL32rm, 0 }, - { X86::IMUL64rr, X86::IMUL64rm, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, - { X86::MAXPDrr, X86::MAXPDrm, 16 }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, - { X86::MAXPSrr, X86::MAXPSrm, 16 }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, - { X86::MAXSDrr, X86::MAXSDrm, 0 }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, - { X86::MAXSSrr, X86::MAXSSrm, 0 }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, - { X86::MINPDrr, X86::MINPDrm, 16 }, - { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, - { X86::MINPSrr, X86::MINPSrm, 16 }, - { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, - { X86::MINSDrr, X86::MINSDrm, 0 }, - { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, - { X86::MINSSrr, X86::MINSSrm, 0 }, - { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, - { X86::MULPDrr, X86::MULPDrm, 16 }, - { X86::MULPSrr, X86::MULPSrm, 16 }, - { X86::MULSDrr, X86::MULSDrm, 0 }, - { X86::MULSSrr, X86::MULSSrm, 0 }, - { X86::OR16rr, X86::OR16rm, 0 }, - { X86::OR32rr, X86::OR32rm, 0 }, - { X86::OR64rr, X86::OR64rm, 0 }, - { X86::OR8rr, X86::OR8rm, 0 }, - { X86::ORPDrr, X86::ORPDrm, 16 }, - { X86::ORPSrr, X86::ORPSrm, 16 }, - { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, - { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, - { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, - { X86::PADDBrr, X86::PADDBrm, 16 }, - { X86::PADDDrr, X86::PADDDrm, 16 }, - { X86::PADDQrr, X86::PADDQrm, 16 }, - { X86::PADDSBrr, X86::PADDSBrm, 16 }, - { X86::PADDSWrr, X86::PADDSWrm, 16 }, - { X86::PADDWrr, X86::PADDWrm, 16 }, - { X86::PANDNrr, X86::PANDNrm, 16 }, - { X86::PANDrr, X86::PANDrm, 16 }, - { X86::PAVGBrr, X86::PAVGBrm, 16 }, - { X86::PAVGWrr, X86::PAVGWrm, 16 }, - { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, - { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, - { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, - { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, - { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, - { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, - { X86::PINSRWrri, X86::PINSRWrmi, 16 }, - { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, - { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, - { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, - { X86::PMINSWrr, X86::PMINSWrm, 16 }, - { X86::PMINUBrr, X86::PMINUBrm, 16 }, - { X86::PMULDQrr, X86::PMULDQrm, 16 }, - { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, - { X86::PMULHWrr, X86::PMULHWrm, 16 }, - { X86::PMULLDrr, X86::PMULLDrm, 16 }, - { X86::PMULLWrr, X86::PMULLWrm, 16 }, - { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, - { X86::PORrr, X86::PORrm, 16 }, - { X86::PSADBWrr, X86::PSADBWrm, 16 }, - { X86::PSLLDrr, X86::PSLLDrm, 16 }, - { X86::PSLLQrr, X86::PSLLQrm, 16 }, - { X86::PSLLWrr, X86::PSLLWrm, 16 }, - { X86::PSRADrr, X86::PSRADrm, 16 }, - { X86::PSRAWrr, X86::PSRAWrm, 16 }, - { X86::PSRLDrr, X86::PSRLDrm, 16 }, - { X86::PSRLQrr, X86::PSRLQrm, 16 }, - { X86::PSRLWrr, X86::PSRLWrm, 16 }, - { X86::PSUBBrr, X86::PSUBBrm, 16 }, - { X86::PSUBDrr, X86::PSUBDrm, 16 }, - { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, - { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, - { X86::PSUBWrr, X86::PSUBWrm, 16 }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, - { X86::PXORrr, X86::PXORrm, 16 }, - { X86::SBB32rr, X86::SBB32rm, 0 }, - { X86::SBB64rr, X86::SBB64rm, 0 }, - { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, - { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, - { X86::SUB16rr, X86::SUB16rm, 0 }, - { X86::SUB32rr, X86::SUB32rm, 0 }, - { X86::SUB64rr, X86::SUB64rm, 0 }, - { X86::SUB8rr, X86::SUB8rm, 0 }, - { X86::SUBPDrr, X86::SUBPDrm, 16 }, - { X86::SUBPSrr, X86::SUBPSrm, 16 }, - { X86::SUBSDrr, X86::SUBSDrm, 0 }, - { X86::SUBSSrr, X86::SUBSSrm, 0 }, + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, + { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, + { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, + { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, + { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, + { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 }, + { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 }, + { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 }, + { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, + { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, + { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 }, + { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, + { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, + { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, + { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, + { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, + { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, + { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, + { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, + { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, + { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, + { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, + { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, + { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, + { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, + { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, + { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, + { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, + { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, + { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, + { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, + { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, + { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, + { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, + { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, + { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, + { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, + { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, + { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, + { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, + { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, - { X86::XOR16rr, X86::XOR16rm, 0 }, - { X86::XOR32rr, X86::XOR32rm, 0 }, - { X86::XOR64rr, X86::XOR64rm, 0 }, - { X86::XOR8rr, X86::XOR8rm, 0 }, - { X86::XORPDrr, X86::XORPDrm, 16 }, - { X86::XORPSrr, X86::XORPSrm, 16 } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, + { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + // AVX 128-bit versions of foldable instructions + { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, + { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, + { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, + { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, + { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, + { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, + { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, + { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, + { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, + { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, + { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, + { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, + { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, + { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, + { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, + { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 }, + { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 }, + { X86::VADDSDrr, X86::VADDSDrm, 0 }, + { X86::VADDSSrr, X86::VADDSSrm, 0 }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 }, + { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 }, + { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 }, + { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 }, + { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 }, + { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, + { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, + { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 }, + { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 }, + { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, + { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, + { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 }, + { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 }, + { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 }, + { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 }, + { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 }, + { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 }, + { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 }, + { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 }, + { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 }, + { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 }, + { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, + { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, + { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 }, + { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 }, + { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 }, + { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 }, + { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, + { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, + { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, + { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, + { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 }, + { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 }, + { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 }, + { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 }, + { X86::VMINSDrr, X86::VMINSDrm, 0 }, + { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, + { X86::VMINSSrr, X86::VMINSSrm, 0 }, + { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, + { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, + { X86::VMULSDrr, X86::VMULSDrm, 0 }, + { X86::VMULSSrr, X86::VMULSSrm, 0 }, + { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 }, + { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, + { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, + { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, + { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, + { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, + { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, + { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, + { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, + { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, + { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, + { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, + { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, + { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, + { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, + { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, + { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, + { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, + { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, + { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, + { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, + { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 }, + { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 }, + { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 }, + { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 }, + { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 }, + { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 }, + { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 }, + { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 }, + { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 }, + { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 }, + { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 }, + { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, + { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, + { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, + { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 } + // FIXME: add AVX 256-bit foldable instructions }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; - unsigned Align = OpTbl2[i][2]; - - assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); - RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2[i][1] & TB_NOT_REVERSABLE) - continue; + unsigned MemOp = OpTbl2[i][1]; + unsigned Flags = OpTbl2[i][2]; + AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, + RegOp, MemOp, + // Index 2, folded load + Flags | TB_INDEX_2 | TB_FOLDED_LOAD); + } +} - // Index 2, folded load - unsigned AuxInfo = 2 | (1 << 4); - assert(!MemOp2RegOpTable.count(MemOp) && +void +X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags) { + if ((Flags & TB_NO_FORWARD) == 0) { + assert(!R2MTable.count(RegOp) && "Duplicate entry!"); + R2MTable[RegOp] = std::make_pair(MemOp, Flags); + } + if ((Flags & TB_NO_REVERSE) == 0) { + assert(!M2RTable.count(MemOp) && "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); - } + M2RTable[MemOp] = std::make_pair(RegOp, Flags); + } } bool @@ -796,6 +1000,11 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOVAPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: case X86::VMOVAPSYrm: case X86::VMOVAPDYrm: case X86::VMOVDQAYrm: @@ -820,6 +1029,11 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOVAPSmr: case X86::MOVAPDmr: case X86::MOVDQAmr: + case X86::VMOVSSmr: + case X86::VMOVSDmr: + case X86::VMOVAPSmr: + case X86::VMOVAPDmr: + case X86::VMOVDQAmr: case X86::VMOVAPSYmr: case X86::VMOVAPDYmr: case X86::VMOVDQAYmr: @@ -852,24 +1066,6 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return 0; } -bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isLoad() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) @@ -892,24 +1088,6 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, return 0; } -bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isStore() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -941,12 +1119,20 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: case X86::FsMOVAPSrm: case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. @@ -1009,15 +1195,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator E = MBB.end(); - // It's always safe to clobber EFLAGS at the end of a block. - if (I == E) - return true; - // For compile time consideration, if we are not able to determine the // safety after visiting 4 instructions in each direction, we will assume // it's not safe. MachineBasicBlock::iterator Iter = I; - for (unsigned i = 0; i < 4; ++i) { + for (unsigned i = 0; Iter != E && i < 4; ++i) { bool SeenDef = false; for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { MachineOperand &MO = Iter->getOperand(j); @@ -1037,10 +1219,16 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, // Skip over DBG_VALUE. while (Iter != E && Iter->isDebugValue()) ++Iter; + } - // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (Iter == E) - return true; + // It is safe to clobber EFLAGS at the end of a block of no successor has it + // live in. + if (Iter == E) { + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(X86::EFLAGS)) + return false; + return true; } MachineBasicBlock::iterator B = MBB.begin(); @@ -1946,7 +2134,8 @@ static bool isHReg(unsigned Reg) { } // Try and copy between VR128/VR64 and GR64 registers. -static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, + bool HasAVX) { // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) // SrcReg(GR64) -> DestReg(VR128) @@ -1955,7 +2144,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { if (X86::GR64RegClass.contains(DestReg)) { if (X86::VR128RegClass.contains(SrcReg)) { // Copy from a VR128 register to a GR64 register. - return X86::MOVPQIto64rr; + return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; } else if (X86::VR64RegClass.contains(SrcReg)) { // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; @@ -1963,12 +2152,23 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) - return X86::MOV64toPQIrr; + return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. else if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } + // SrcReg(FR32) -> DestReg(GR32) + // SrcReg(GR32) -> DestReg(FR32) + + if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + + if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + return 0; } @@ -1977,6 +2177,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -1988,18 +2189,21 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) + TM.getSubtarget<X86Subtarget>().is64Bit()) { Opc = X86::MOV8rr_NOREX; - else + // Both operands must be encodable without an REX prefix. + assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && + "8-bit H register can not be copied outside GR8_NOREX"); + } else Opc = X86::MOV8rr; } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) - Opc = X86::MOVAPSrr; + Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -2043,6 +2247,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool isStackAligned, const TargetMachine &TM, bool load) { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); switch (RC->getSize()) { default: llvm_unreachable("Unknown spill size"); @@ -2061,7 +2266,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, if (X86::GR32RegClass.hasSubClassEq(RC)) return load ? X86::MOV32rm : X86::MOV32mr; if (X86::FR32RegClass.hasSubClassEq(RC)) - return load ? X86::MOVSSrm : X86::MOVSSmr; + return load ? + (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : + (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); if (X86::RFP32RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp32m : X86::ST_Fp32m; llvm_unreachable("Unknown 4-byte regclass"); @@ -2069,7 +2276,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, if (X86::GR64RegClass.hasSubClassEq(RC)) return load ? X86::MOV64rm : X86::MOV64mr; if (X86::FR64RegClass.hasSubClassEq(RC)) - return load ? X86::MOVSDrm : X86::MOVSDmr; + return load ? + (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) : + (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); if (X86::VR64RegClass.hasSubClassEq(RC)) return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; if (X86::RFP64RegClass.hasSubClassEq(RC)) @@ -2078,13 +2287,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, case 10: assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; - case 16: + case 16: { assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) - return load ? X86::MOVAPSrm : X86::MOVAPSmr; + return load ? + (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) : + (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); else - return load ? X86::MOVUPSrm : X86::MOVUPSmr; + return load ? + (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) : + (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } case 32: assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); // If stack is realigned we can use aligned stores. @@ -2118,7 +2332,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2133,7 +2348,9 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = MMOBegin != MMOEnd && + (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2151,7 +2368,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2164,7 +2382,9 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = MMOBegin != MMOEnd && + (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2174,6 +2394,40 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } +/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr +/// instruction with two undef reads of the register being defined. This is +/// used for mapping: +/// %xmm4 = V_SET0 +/// to: +/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef> +/// +static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) { + assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); + unsigned Reg = MI->getOperand(0).getReg(); + MI->setDesc(Desc); + + // MachineInstr::addOperand() will insert explicit operands before any + // implicit operands. + MachineInstrBuilder(MI).addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + // But we don't trust that. + assert(MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getReg() == Reg && "Misplaced operand"); + return true; +} + +bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + switch (MI->getOpcode()) { + case X86::V_SET0: + return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr)); + case X86::TEST8ri_NOREX: + MI->setDesc(get(X86::TEST8ri)); + return true; + } + return false; +} + MachineInstr* X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -2305,7 +2559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr->find(MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; - unsigned MinAlign = I->second.second; + unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; if (Align < MinAlign) return NULL; bool NarrowToMOV32rm = false; @@ -2352,6 +2606,51 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; } +/// hasPartialRegUpdate - Return true for all instructions that only update +/// the first 32 or 64-bits of the destination register and leave the rest +/// unmodified. This can be used to avoid folding loads if the instructions +/// only update part of the destination register, and the non-updated part is +/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these +/// instructions breaks the partial register dependency and it can improve +/// performance. e.g.: +/// +/// movss (%rdi), %xmm0 +/// cvtss2sd %xmm0, %xmm0 +/// +/// Instead of +/// cvtss2sd (%rdi), %xmm0 +/// +/// FIXME: This should be turned into a TSFlags. +/// +static bool hasPartialRegUpdate(unsigned Opcode) { + switch (Opcode) { + case X86::CVTSD2SSrr: + case X86::Int_CVTSD2SSrr: + case X86::CVTSS2SDrr: + case X86::Int_CVTSS2SDrr: + case X86::RCPSSr: + case X86::RCPSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: + case X86::RSQRTSSr: + case X86::RSQRTSSr_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + // AVX encoded versions + case X86::VCVTSD2SSrr: + case X86::Int_VCVTSD2SSrr: + case X86::VCVTSS2SDrr: + case X86::Int_VCVTSS2SDrr: + case X86::VRCPSSr: + case X86::VROUNDSDr: + case X86::VROUNDSSr: + case X86::VRSQRTSSr: + case X86::VSQRTSSr: + return true; + } + + return false; +} MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, @@ -2360,22 +2659,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check switch flag if (NoFusing) return NULL; - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - switch (MI->getOpcode()) { - case X86::CVTSD2SSrr: - case X86::Int_CVTSD2SSrr: - case X86::CVTSS2SDrr: - case X86::Int_CVTSS2SDrr: - case X86::RCPSSr: - case X86::RCPSSr_Int: - case X86::ROUNDSDr: - case X86::ROUNDSSr: - case X86::RSQRTSSr: - case X86::RSQRTSSr_Int: - case X86::SQRTSSr: - case X86::SQRTSSr_Int: - return 0; - } + // Unless optimizing for size, don't fold to avoid partial + // register update stalls + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + hasPartialRegUpdate(MI->getOpcode())) + return 0; const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); @@ -2412,22 +2700,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check switch flag if (NoFusing) return NULL; - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - switch (MI->getOpcode()) { - case X86::CVTSD2SSrr: - case X86::Int_CVTSD2SSrr: - case X86::CVTSS2SDrr: - case X86::Int_CVTSS2SDrr: - case X86::RCPSSr: - case X86::RCPSSr_Int: - case X86::ROUNDSDr: - case X86::ROUNDSSr: - case X86::RSQRTSSr: - case X86::RSQRTSSr_Int: - case X86::SQRTSSr: - case X86::SQRTSSr_Int: - return 0; - } + // Unless optimizing for size, don't fold to avoid partial + // register update stalls + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + hasPartialRegUpdate(MI->getOpcode())) + return 0; // Determine the alignment of the load. unsigned Alignment = 0; @@ -2439,13 +2716,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PDY: Alignment = 32; break; - case X86::V_SET0PS: - case X86::V_SET0PD: - case X86::V_SET0PI: + case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PS: - case X86::AVX_SET0PD: - case X86::AVX_SET0PI: + case X86::AVX_SETALLONES: Alignment = 16; break; case X86::FsFLD0SD: @@ -2481,18 +2754,16 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { - case X86::V_SET0PS: - case X86::V_SET0PD: - case X86::V_SET0PI: + case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PS: - case X86::AVX_SET0PD: - case X86::AVX_SET0PI: case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: + case X86::AVX_SETALLONES: case X86::FsFLD0SD: - case X86::FsFLD0SS: { - // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. + case X86::FsFLD0SS: + case X86::VFsFLD0SD: + case X86::VFsFLD0SS: { + // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. @@ -2515,7 +2786,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const Type *Ty; + Type *Ty; unsigned Opc = LoadMI->getOpcode(); if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); @@ -2525,9 +2796,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? - Constant::getAllOnesValue(Ty) : - Constant::getNullValue(Ty); + + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES); + const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : + Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); // Create operands to load from the constant pool entry. @@ -2615,9 +2887,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + unsigned Index = I->second.second & TB_INDEX_MASK; + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; if (UnfoldLoad && !FoldedLoad) return false; UnfoldLoad &= FoldedLoad; @@ -2743,9 +3015,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + unsigned Index = I->second.second & TB_INDEX_MASK; + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); unsigned NumDefs = MCID.NumDefs; @@ -2780,7 +3052,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) // Do not introduce a slow unaligned load. return false; - bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (*MMOs.first) && + (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2822,7 +3096,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) // Do not introduce a slow unaligned store. return false; - bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (*MMOs.first) && + (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2843,14 +3119,14 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, MemOp2RegOpTable.find(Opc); if (I == MemOp2RegOpTable.end()) return 0; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; if (UnfoldLoad && !FoldedLoad) return 0; if (UnfoldStore && !FoldedStore) return 0; if (LoadRegIndex) - *LoadRegIndex = I->second.second & 0xf; + *LoadRegIndex = I->second.second & TB_INDEX_MASK; return I->second.first; } @@ -2881,6 +3157,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + // AVX load instructions + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: @@ -2908,6 +3194,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + // AVX load instructions + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: @@ -3007,31 +3303,6 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); } - -/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) -/// register? e.g. r8, xmm8, xmm13, etc. -bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { - switch (RegNo) { - default: break; - case X86::R8: case X86::R9: case X86::R10: case X86::R11: - case X86::R12: case X86::R13: case X86::R14: case X86::R15: - case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: - case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: - case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: - case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: - case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: - case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: - case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: - case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: - case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: - case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: - case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: - case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: - return true; - } - return false; -} - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. @@ -3072,7 +3343,6 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, - { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, // AVX 128-bit support @@ -3088,7 +3358,6 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, - { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, // AVX 256-bit support @@ -3111,13 +3380,13 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) { } std::pair<uint16_t, uint16_t> -X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { +X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; return std::make_pair(domain, domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); } -void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { +void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); @@ -3158,6 +3427,29 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::SQRTSSm_Int: case X86::SQRTSSr: case X86::SQRTSSr_Int: + // AVX instructions with high latency + case X86::VDIVSDrm: + case X86::VDIVSDrm_Int: + case X86::VDIVSDrr: + case X86::VDIVSDrr_Int: + case X86::VDIVSSrm: + case X86::VDIVSSrm_Int: + case X86::VDIVSSrr: + case X86::VDIVSSrr_Int: + case X86::VSQRTPDm: + case X86::VSQRTPDm_Int: + case X86::VSQRTPDr: + case X86::VSQRTPDr_Int: + case X86::VSQRTPSm: + case X86::VSQRTPSm_Int: + case X86::VSQRTPSr: + case X86::VSQRTPSr_Int: + case X86::VSQRTSDm: + case X86::VSQRTSDm_Int: + case X86::VSQRTSDr: + case X86::VSQRTSSm: + case X86::VSQRTSSm_Int: + case X86::VSQRTSSr: return true; } } diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index 5f2eba3..97009db 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -27,24 +27,6 @@ namespace llvm { class X86TargetMachine; namespace X86 { - // Enums for memory operand decoding. Each memory operand is represented with - // a 5 operand sequence in the form: - // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] - // These enums help decode this. - enum { - AddrBaseReg = 0, - AddrScaleAmt = 1, - AddrIndexReg = 2, - AddrDisp = 3, - - /// AddrSegmentReg - The operand # of the segment in the memory operand. - AddrSegmentReg = 4, - - /// AddrNumOperands - Total number of operands in a memory reference. - AddrNumOperands = 5 - }; - - // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -82,133 +64,8 @@ namespace X86 { /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); +} // end namespace X86; -} - -/// X86II - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace X86II { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // X86 Specific MachineOperand flags. - - MO_NO_FLAG, - - /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a - /// relocation of: - /// SYMBOL_LABEL + [. - PICBASELABEL] - MO_GOT_ABSOLUTE_ADDRESS, - - /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the - /// immediate should get the value of the symbol minus the PIC base label: - /// SYMBOL_LABEL - PICBASELABEL - MO_PIC_BASE_OFFSET, - - /// MO_GOT - On a symbol operand this indicates that the immediate is the - /// offset to the GOT entry for the symbol name from the base of the GOT. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOT - MO_GOT, - - /// MO_GOTOFF - On a symbol operand this indicates that the immediate is - /// the offset to the location of the symbol name from the base of the GOT. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOTOFF - MO_GOTOFF, - - /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is - /// offset to the GOT entry for the symbol name from the current code - /// location. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOTPCREL - MO_GOTPCREL, - - /// MO_PLT - On a symbol operand this indicates that the immediate is - /// offset to the PLT entry of symbol name from the current code location. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @PLT - MO_PLT, - - /// MO_TLSGD - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @TLSGD - MO_TLSGD, - - /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @GOTTPOFF - MO_GOTTPOFF, - - /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @INDNTPOFF - MO_INDNTPOFF, - - /// MO_TPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @TPOFF - MO_TPOFF, - - /// MO_NTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @NTPOFF - MO_NTPOFF, - - /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "__imp_FOO" symbol. This is used for - /// dllimport linkage on windows. - MO_DLLIMPORT, - - /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and earlier. - MO_DARWIN_STUB, - - /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a - /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY, - - /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates - /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is - /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY_PIC_BASE, - - /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this - /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", - /// which is a PIC-base-relative reference to a hidden dyld lazy pointer - /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, - - /// MO_TLVP - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// This is the TLS offset for the Darwin TLS mechanism. - MO_TLVP, - - /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate - /// is some TLS offset from the picbase. - /// - /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. - MO_TLVP_PIC_BASE - }; -} /// isGlobalStubReference - Return true if the specified TargetFlag operand is /// a reference to a stub for a global, not the global itself. @@ -243,353 +100,6 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { } } -/// X86II - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace X86II { - enum { - //===------------------------------------------------------------------===// - // Instruction encodings. These are the standard/most common forms for X86 - // instructions. - // - - // PseudoFrm - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - Pseudo = 0, - - /// Raw - This form is for instructions that don't have any operands, so - /// they are just a fixed opcode value, like 'leave'. - RawFrm = 1, - - /// AddRegFrm - This form is used for instructions like 'push r32' that have - /// their one register operand added to their opcode. - AddRegFrm = 2, - - /// MRMDestReg - This form is used for instructions that use the Mod/RM byte - /// to specify a destination, which in this case is a register. - /// - MRMDestReg = 3, - - /// MRMDestMem - This form is used for instructions that use the Mod/RM byte - /// to specify a destination, which in this case is memory. - /// - MRMDestMem = 4, - - /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte - /// to specify a source, which in this case is a register. - /// - MRMSrcReg = 5, - - /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte - /// to specify a source, which in this case is memory. - /// - MRMSrcMem = 6, - - /// MRM[0-7][rm] - These forms are used to represent instructions that use - /// a Mod/RM byte, and use the middle field to hold extended opcode - /// information. In the intel manual these are represented as /0, /1, ... - /// - - // First, instructions that operate on a register r/m operand... - MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3 - MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7 - - // Next, instructions that operate on a memory r/m operand... - MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 - MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 - - // MRMInitReg - This form is used for instructions whose source and - // destinations are the same register. - MRMInitReg = 32, - - //// MRM_C1 - A mod/rm byte of exactly 0xC1. - MRM_C1 = 33, - MRM_C2 = 34, - MRM_C3 = 35, - MRM_C4 = 36, - MRM_C8 = 37, - MRM_C9 = 38, - MRM_E8 = 39, - MRM_F0 = 40, - MRM_F8 = 41, - MRM_F9 = 42, - MRM_D0 = 45, - MRM_D1 = 46, - - /// RawFrmImm8 - This is used for the ENTER instruction, which has two - /// immediates, the first of which is a 16-bit immediate (specified by - /// the imm encoding) and the second is a 8-bit fixed value. - RawFrmImm8 = 43, - - /// RawFrmImm16 - This is used for CALL FAR instructions, which have two - /// immediates, the first of which is a 16 or 32-bit immediate (specified by - /// the imm encoding) and the second is a 16-bit fixed value. In the AMD - /// manual, this operand is described as pntr16:32 and pntr16:16 - RawFrmImm16 = 44, - - FormMask = 63, - - //===------------------------------------------------------------------===// - // Actual flags... - - // OpSize - Set if this instruction requires an operand size prefix (0x66), - // which most often indicates that the instruction operates on 16 bit data - // instead of 32 bit data. - OpSize = 1 << 6, - - // AsSize - Set if this instruction requires an operand size prefix (0x67), - // which most often indicates that the instruction address 16 bit address - // instead of 32 bit address (or 32 bit address in 64 bit mode). - AdSize = 1 << 7, - - //===------------------------------------------------------------------===// - // Op0Mask - There are several prefix bytes that are used to form two byte - // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is - // used to obtain the setting of this field. If no bits in this field is - // set, there is no prefix byte for obtaining a multibyte opcode. - // - Op0Shift = 8, - Op0Mask = 0x1F << Op0Shift, - - // TB - TwoByte - Set if this instruction has a two byte opcode, which - // starts with a 0x0F byte before the real opcode. - TB = 1 << Op0Shift, - - // REP - The 0xF3 prefix byte indicating repetition of the following - // instruction. - REP = 2 << Op0Shift, - - // D8-DF - These escape opcodes are used by the floating point unit. These - // values must remain sequential. - D8 = 3 << Op0Shift, D9 = 4 << Op0Shift, - DA = 5 << Op0Shift, DB = 6 << Op0Shift, - DC = 7 << Op0Shift, DD = 8 << Op0Shift, - DE = 9 << Op0Shift, DF = 10 << Op0Shift, - - // XS, XD - These prefix codes are for single and double precision scalar - // floating point operations performed in the SSE registers. - XD = 11 << Op0Shift, XS = 12 << Op0Shift, - - // T8, TA, A6, A7 - Prefix after the 0x0F prefix. - T8 = 13 << Op0Shift, TA = 14 << Op0Shift, - A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, - - // TF - Prefix before and after 0x0F - TF = 17 << Op0Shift, - - //===------------------------------------------------------------------===// - // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. - // They are used to specify GPRs and SSE registers, 64-bit operand size, - // etc. We only cares about REX.W and REX.R bits and only the former is - // statically determined. - // - REXShift = Op0Shift + 5, - REX_W = 1 << REXShift, - - //===------------------------------------------------------------------===// - // This three-bit field describes the size of an immediate operand. Zero is - // unused so that we can tell if we forgot to set a value. - ImmShift = REXShift + 1, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm8PCRel = 2 << ImmShift, - Imm16 = 3 << ImmShift, - Imm16PCRel = 4 << ImmShift, - Imm32 = 5 << ImmShift, - Imm32PCRel = 6 << ImmShift, - Imm64 = 7 << ImmShift, - - //===------------------------------------------------------------------===// - // FP Instruction Classification... Zero is non-fp instruction. - - // FPTypeMask - Mask for all of the FP types... - FPTypeShift = ImmShift + 3, - FPTypeMask = 7 << FPTypeShift, - - // NotFP - The default, set for instructions that do not use FP registers. - NotFP = 0 << FPTypeShift, - - // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0 - ZeroArgFP = 1 << FPTypeShift, - - // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst - OneArgFP = 2 << FPTypeShift, - - // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a - // result back to ST(0). For example, fcos, fsqrt, etc. - // - OneArgFPRW = 3 << FPTypeShift, - - // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an - // explicit argument, storing the result to either ST(0) or the implicit - // argument. For example: fadd, fsub, fmul, etc... - TwoArgFP = 4 << FPTypeShift, - - // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an - // explicit argument, but have no destination. Example: fucom, fucomi, ... - CompareFP = 5 << FPTypeShift, - - // CondMovFP - "2 operand" floating point conditional move instructions. - CondMovFP = 6 << FPTypeShift, - - // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. - SpecialFP = 7 << FPTypeShift, - - // Lock prefix - LOCKShift = FPTypeShift + 3, - LOCK = 1 << LOCKShift, - - // Segment override prefixes. Currently we just need ability to address - // stuff in gs and fs segments. - SegOvrShift = LOCKShift + 1, - SegOvrMask = 3 << SegOvrShift, - FS = 1 << SegOvrShift, - GS = 2 << SegOvrShift, - - // Execution domain for SSE instructions in bits 23, 24. - // 0 in bits 23-24 means normal, non-SSE instruction. - SSEDomainShift = SegOvrShift + 2, - - OpcodeShift = SSEDomainShift + 2, - - //===------------------------------------------------------------------===// - /// VEX - The opcode prefix used by AVX instructions - VEXShift = OpcodeShift + 8, - VEX = 1U << 0, - - /// VEX_W - Has a opcode specific functionality, but is used in the same - /// way as REX_W is for regular SSE instructions. - VEX_W = 1U << 1, - - /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 - /// address instructions in SSE are represented as 3 address ones in AVX - /// and the additional register is encoded in VEX_VVVV prefix. - VEX_4V = 1U << 2, - - /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, - /// must be encoded in the i8 immediate field. This usually happens in - /// instructions with 4 operands. - VEX_I8IMM = 1U << 3, - - /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current - /// instruction uses 256-bit wide registers. This is usually auto detected - /// if a VR256 register is used, but some AVX instructions also have this - /// field marked when using a f256 memory references. - VEX_L = 1U << 4, - - /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the - /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents - /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction - /// storing a classifier in the imm8 field. To simplify our implementation, - /// we handle this by storeing the classifier in the opcode field and using - /// this flag to indicate that the encoder should do the wacky 3DNow! thing. - Has3DNow0F0FOpcode = 1U << 5 - }; - - // getBaseOpcodeFor - This function returns the "base" X86 opcode for the - // specified machine instruction. - // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { - return TSFlags >> X86II::OpcodeShift; - } - - static inline bool hasImm(uint64_t TSFlags) { - return (TSFlags & X86II::ImmMask) != 0; - } - - /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field - /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { - switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8: - case X86II::Imm8PCRel: return 1; - case X86II::Imm16: - case X86II::Imm16PCRel: return 2; - case X86II::Imm32: - case X86II::Imm32PCRel: return 4; - case X86II::Imm64: return 8; - } - } - - /// isImmPCRel - Return true if the immediate of the specified instruction's - /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { - switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8PCRel: - case X86II::Imm16PCRel: - case X86II::Imm32PCRel: - return true; - case X86II::Imm8: - case X86II::Imm16: - case X86II::Imm32: - case X86II::Imm64: - return false; - } - } - - /// getMemoryOperandNo - The function returns the MCInst operand # for the - /// first field of the memory operand. If the instruction doesn't have a - /// memory operand, this returns -1. - /// - /// Note that this ignores tied operands. If there is a tied register which - /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only - /// counted as one operand. - /// - static inline int getMemoryOperandNo(uint64_t TSFlags) { - switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); - default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); - case X86II::Pseudo: - case X86II::RawFrm: - case X86II::AddRegFrm: - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - case X86II::RawFrmImm8: - case X86II::RawFrmImm16: - return -1; - case X86II::MRMDestMem: - return 0; - case X86II::MRMSrcMem: { - bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; - unsigned FirstMemOp = 1; - if (HasVEX_4V) - ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). - - // FIXME: Maybe lea should have its own form? This is a horrible hack. - //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - // Opcode == X86::LEA16r || Opcode == X86::LEA32r) - return FirstMemOp; - } - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - return -1; - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - return 0; - case X86II::MRM_C1: - case X86II::MRM_C2: - case X86II::MRM_C3: - case X86II::MRM_C4: - case X86II::MRM_C8: - case X86II::MRM_C9: - case X86II::MRM_E8: - case X86II::MRM_F0: - case X86II::MRM_F8: - case X86II::MRM_F9: - case X86II::MRM_D0: - case X86II::MRM_D1: - return -1; - } - } -} - inline static bool isScale(const MachineOperand &MO) { return MO.isImm() && (MO.getImm() == 1 || MO.getImm() == 2 || @@ -621,14 +131,22 @@ class X86InstrInfo : public X86GenInstrInfo { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; + typedef DenseMap<unsigned, + std::pair<unsigned, unsigned> > RegOp2MemOpTableType; + RegOp2MemOpTableType RegOp2MemOpTable2Addr; + RegOp2MemOpTableType RegOp2MemOpTable0; + RegOp2MemOpTableType RegOp2MemOpTable1; + RegOp2MemOpTableType RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// - DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable; + typedef DenseMap<unsigned, + std::pair<unsigned, unsigned> > MemOp2RegOpTableType; + MemOp2RegOpTableType MemOp2RegOpTable; + + void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -656,17 +174,6 @@ public: unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const; - /// hasLoadFromStackSlot - If the specified machine instruction has - /// a load from a stack slot, return true along with the FrameIndex - /// of the loaded stack slot and the machine mem operand containing - /// the reference. If not, return false. Unlike - /// isLoadFromStackSlot, this returns true for any instructions that - /// loads from the stack. This is a hint only and may not catch all - /// cases. - bool hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't @@ -674,16 +181,6 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const; - /// hasStoreToStackSlot - If the specified machine instruction has a - /// store to a stack slot, return true along with the FrameIndex of - /// the loaded stack slot and the machine mem operand containing the - /// reference. If not, return false. Unlike isStoreToStackSlot, - /// this returns true for any instructions that loads from the - /// stack. This is a hint only and may not catch all cases. - bool hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -750,6 +247,9 @@ public: MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -829,32 +329,21 @@ public: /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - static bool isX86_64NonExtLowByteReg(unsigned reg) { - return (reg == X86::SPL || reg == X86::BPL || - reg == X86::SIL || reg == X86::DIL); - } - static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; - return isX86_64ExtendedReg(MO.getReg()); + return X86II::isX86_64ExtendedReg(MO.getReg()); } - /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or - /// higher) register? e.g. r8, xmm8, xmm13, etc. - static bool isX86_64ExtendedReg(unsigned RegNo); - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// unsigned getGlobalBaseReg(MachineFunction *MF) const; - /// GetSSEDomain - Return the SSE execution domain of MI as the first element, - /// and a bitmask of possible arguments to SetSSEDomain ase the second. - std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const; + std::pair<uint16_t, uint16_t> + getExecutionDomain(const MachineInstr *MI) const; - /// SetSSEDomain - Set the SSEDomain of MI. - void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 7eb07b0..d54bf27 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -65,7 +65,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2, def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; -def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; @@ -97,6 +97,8 @@ def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; + def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -133,9 +135,13 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, +def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -218,12 +224,16 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; +def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, + [SDNPHasChain]>; + def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -331,6 +341,11 @@ class ImmSExtAsmOperandClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +class ImmZExtAsmOperandClass : AsmOperandClass { + let SuperClasses = [ImmAsmOperand]; + let RenderMethod = "addImmOperands"; +} + // Sign-extended immediate classes. We don't need to define the full lattice // here because there is no instruction with an ambiguity between ImmSExti64i32 // and ImmSExti32i8. @@ -358,6 +373,12 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } +// [0, 0x000000FF] +def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass { + let Name = "ImmZExtu32u8"; +} + + // [0, 0x0000007F] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { @@ -377,6 +398,11 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } +// 32-bits but only 8 bits are significant, and those 8 bits are unsigned. +def u32u8imm : Operand<i32> { + let ParserMatchClass = ImmZExtu32u8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { @@ -389,11 +415,13 @@ def i64i32imm : Operand<i64> { def i64i32imm_pcrel : Operand<i64> { let PrintMethod = "print_pcrel_imm"; let ParserMatchClass = X86AbsMemAsmOperand; + let OperandType = "OPERAND_PCREL"; } // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64> { let ParserMatchClass = ImmSExti64i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } def lea64_32mem : Operand<i32> { @@ -442,18 +470,33 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; +def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; +def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; +def HasF16C : Predicate<"Subtarget->hasF16C()">; +def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; +def HasBMI : Predicate<"Subtarget->hasBMI()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">, + AssemblerPredicate<"ModeNaCl">; +def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">, + AssemblerPredicate<"ModeNaCl,!Mode64Bit">; +def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">, + AssemblerPredicate<"ModeNaCl,Mode64Bit">; +def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">, + AssemblerPredicate<"!ModeNaCl">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" @@ -766,30 +809,30 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { -def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; -def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; -def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>; +def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize; +def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>; def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in -def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; +def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>; let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in -def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; +def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in -def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; +def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>; let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; -def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; -def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; -def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; +def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>; +def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize; +def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>; def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; -def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; -def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; -def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; +def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>; +def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize; +def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>; def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; @@ -841,22 +884,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{b}\t{$src, %al|%al, $src}", []>, + "mov{b}\t{$src, %al|AL, $src}", []>, Requires<[In32BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), - "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize, + "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize, Requires<[In32BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), - "mov{l}\t{$src, %eax|%eax, $src}", []>, + "mov{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>; def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{b}\t{%al, $dst|$dst, %al}", []>, + "mov{b}\t{%al, $dst|$dst, AL}", []>, Requires<[In32BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), - "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize, + "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize, Requires<[In32BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), - "mov{l}\t{%eax, $dst|$dst, %eax}", []>, + "mov{l}\t{%eax, $dst|$dst, EAX}", []>, Requires<[In32BitMode]>; // FIXME: These definitions are utterly broken @@ -865,13 +908,13 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), // in question. /* def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; + "mov{q}\t{$src, %rax|RAX, $src}", []>; def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; + "mov{q}\t{$src, %rax|RAX, $src}", []>; def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + "mov{q}\t{%rax, $dst|$dst, RAX}", []>; def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + "mov{q}\t{%rax, $dst|$dst, RAX}", []>; */ @@ -926,7 +969,7 @@ let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; -let mayLoad = 1, +let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), @@ -1117,11 +1160,15 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), } def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), - "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), - "xchg{l}\t{$src, %eax|%eax, $src}", []>; + "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>; +// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. +// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. +def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), + "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>; def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), - "xchg{q}\t{$src, %rax|%rax, $src}", []>; + "xchg{q}\t{$src, %rax|RAX, $src}", []>; @@ -1172,7 +1219,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), - "cmpxchg16b\t$dst", []>, TB; + "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>; @@ -1261,6 +1308,104 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; //===----------------------------------------------------------------------===// +// MOVBE Instructions +// +let Predicates = [HasMOVBE] in { + def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "movbe{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8; + def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "movbe{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8; + def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "movbe{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8; + def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "movbe{w}\t{$src, $dst|$dst, $src}", + [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8; + def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movbe{l}\t{$src, $dst|$dst, $src}", + [(store (bswap GR32:$src), addr:$dst)]>, T8; + def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movbe{q}\t{$src, $dst|$dst, $src}", + [(store (bswap GR64:$src), addr:$dst)]>, T8; +} + +//===----------------------------------------------------------------------===// +// RDRAND Instruction +// +let Predicates = [HasRDRAND], Defs = [EFLAGS] in { + def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), + "rdrand{w}\t$dst", []>, OpSize, TB; + def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), + "rdrand{l}\t$dst", []>, TB; + def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), + "rdrand{q}\t$dst", []>, TB; +} + +//===----------------------------------------------------------------------===// +// LZCNT Instruction +// +let Predicates = [HasLZCNT], Defs = [EFLAGS] in { + def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS, + OpSize; + def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctlz (loadi16 addr:$src))), + (implicit EFLAGS)]>, XS, OpSize; + + def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS; + def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "lzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctlz (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "lzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>, + XS; + def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctlz (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} + +//===----------------------------------------------------------------------===// +// TZCNT Instruction +// +let Predicates = [HasBMI], Defs = [EFLAGS] in { + def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS, + OpSize; + def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz (loadi16 addr:$src))), + (implicit EFLAGS)]>, XS, OpSize; + + def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS; + def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, + XS; + def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} + +//===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// @@ -1646,3 +1791,9 @@ def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>; def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>; def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>; def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>; + +// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms. +def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>; +def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>; +def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; +def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index fe11d77..d3ced23 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -116,7 +116,217 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, } //===----------------------------------------------------------------------===// -// SSE 1 & 2 - Move Instructions +// Non-instruction patterns +//===----------------------------------------------------------------------===// + +// A vector extract of the first f32/f64 position is a subregister copy +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + +// A 128-bit subvector extract from the first 256-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), + (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; +def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), + (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; + +def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), + (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; +def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), + (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; + +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), + (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), + (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; + +// A 128-bit subvector insert to the first 256-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +def : Pat<(v8f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; +def : Pat<(v4f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + +// Bitcasts between 128-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasXMMInt] in { + def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +} + +// Bitcasts between 256-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; +} + +// Alias instructions that map fld0 to pxor for sse. +// FIXME: Set encoding to pseudo! +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in { + def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; + def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; + def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; + def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; +} + +//===----------------------------------------------------------------------===// +// AVX & SSE - Zero/One Vectors +//===----------------------------------------------------------------------===// + +// Alias instruction that maps zero vector to pxor / xorp* for sse. +// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then +// swizzled by ExecutionDepsFix to pxor. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, neverHasSideEffects = 1 in { +def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; +} + +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; + + +// The same as done above but for AVX. The 256-bit ISA does not support PI, +// and doesn't need it because on sandy bridge the register is set to zero +// at the rename stage without using any execution unit, so SET0PSY +// and SET0PDY can be used for vector int instructions without penalty +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, Predicates = [HasAVX] in { +def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; +} + + +// AVX has no support for 256-bit integer instructions, but since the 128-bit +// VPXOR instruction writes zero to its upper part, it's safe build zeros. +def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; + +def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; + +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementation, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in + def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move FP Scalar Instructions +// +// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 +// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr +// is used instead. Register-to-register movss/movsd is not modeled as an +// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable +// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : @@ -130,28 +340,57 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))]>; -// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 -// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr -// is used instead. Register-to-register movss/movsd is not modeled as an -// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable -// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. +// AVX def VMOVSSrr : sse12_move_rr<FR32, v4f32, - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, + VEX_LIG; def VMOVSDrr : sse12_move_rr<FR64, v2f64, - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, + VEX_LIG; -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; +// For the disassembler +let isCodeGenOnly = 1 in { + def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + XS, VEX_4V, VEX_LIG; + def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + XD, VEX_4V, VEX_LIG; +} +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX, + VEX_LIG; let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; + def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX, + VEX_LIG; } +def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG; +def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG; + +// SSE1 & 2 let Constraints = "$src1 = $dst" in { def MOVSSrr : sse12_move_rr<FR32, v4f32, "movss\t{$src2, $dst|$dst, $src2}">, XS; def MOVSDrr : sse12_move_rr<FR64, v2f64, "movsd\t{$src2, $dst|$dst, $src2}">, XD; + + // For the disassembler + let isCodeGenOnly = 1 in { + def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", []>, XS; + def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", []>, XD; + } } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -161,54 +400,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; } -let AddedComplexity = 15 in { -// Extract the low 32-bit value from one vector and insert it into another. -def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -// Extract the low 64-bit value from one vector and insert it into another. -def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; -} - -// Implicitly promote a 32-bit scalar to a vector. -def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; -// Implicitly promote a 32-bit scalar to a vector. -def : Pat<(v8f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v4f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; - -let AddedComplexity = 20 in { -// MOVSSrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -// MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -} - -// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; @@ -216,24 +407,257 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX; +// Patterns +let Predicates = [HasSSE1] in { + let AddedComplexity = 15 in { + // Extract the low 32-bit value from one vector and insert it into another. + def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSS to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + } -// Extract and store. -def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; -def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + } + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + + // Shuffle with MOVSS + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, FR32:$src2)>; + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 15 in { + // Extract the low 64-bit value from one vector and insert it into another. + def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + + // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd + def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSD to the lower bits. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + } + + // Extract and store. + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with MOVSD + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, FR64:$src2)>; + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; +} + +let Predicates = [HasAVX] in { + let AddedComplexity = 15 in { + // Extract the low 32-bit value from one vector and insert it into another. + def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + + // Extract the low 64-bit value from one vector and insert it into another. + def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + + // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd + def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVS{S,D} to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (VMOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + + // Represent the same patterns above but in the form they appear for + // 256-bit types + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; + } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (SUBREG_TO_REG (i32 0), + (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), + sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (SUBREG_TO_REG (i64 0), + (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), + sub_xmm)>; + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (VMOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (VMOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with VMOVSS + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (VMOVSSrr VR128:$src1, FR32:$src2)>; + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + + // Shuffle with VMOVSD + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (VMOVSDrr VR128:$src1, FR64:$src2)>; + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), + sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), + sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), + sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), + sub_sd))>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions +//===----------------------------------------------------------------------===// -// Move Aligned/Unaligned floating point values multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, @@ -248,22 +672,22 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, VEX; + "movaps", SSEPackedSingle>, TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, OpSize, VEX; + "movapd", SSEPackedDouble>, TB, OpSize, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, VEX; + "movups", SSEPackedSingle>, TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, OpSize, VEX; + "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, - "movaps", SSEPackedSingle>, VEX; + "movaps", SSEPackedSingle>, TB, VEX; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, - "movapd", SSEPackedDouble>, OpSize, VEX; + "movapd", SSEPackedDouble>, TB, OpSize, VEX; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, - "movups", SSEPackedSingle>, VEX; + "movups", SSEPackedSingle>, TB, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, - "movupd", SSEPackedDouble, 0>, OpSize, VEX; + "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -287,10 +711,10 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; @@ -298,6 +722,34 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; +// For disassembler +let isCodeGenOnly = 1 in { + def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movups\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movups\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", []>, VEX; +} + def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; @@ -319,24 +771,155 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)]>; -// Intrinsic forms of MOVUPS/D load and store -def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; -def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; - -def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; -def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; - -// Move Low/High packed floating point values +// For disassembler +let isCodeGenOnly = 1 in { + def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; + def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; + def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movups\t{$src, $dst|$dst, $src}", []>; + def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", []>; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), + (VMOVUPDmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE1] in + def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), + (MOVUPSmr addr:$dst, VR128:$src)>; +let Predicates = [HasSSE2] in + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), + (MOVUPDmr addr:$dst, VR128:$src)>; + +// Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + +// Use vmovaps/vmovups for AVX integer load/store. +let Predicates = [HasAVX] in { + // 128-bit load/store + def : Pat<(alignedloadv4i32 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (VMOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedloadv4i64 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv4i64 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedloadv8i32 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv8i32 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v4i64 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v8i32 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v16i16 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v32i8 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; +} + +// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper +// bits are disregarded. FIXME: Set encoding to pseudo! +let neverHasSideEffects = 1 in { +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; +def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper +// bits are disregarded. FIXME: Set encoding to pseudo! +let canFoldAsLoad = 1, isReMaterializable = 1 in { +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +let isCodeGenOnly = 1 in { + def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX; + def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; +} +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Low packed FP Instructions +//===----------------------------------------------------------------------===// + multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, PatFrag mov_frag, string base_opc, string asm_opr> { @@ -359,14 +942,10 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, let AddedComplexity = 20 in { defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $dst|$dst, $src2}">; - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $dst|$dst, $src2}">; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -386,6 +965,147 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)]>; +let Predicates = [HasAVX] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS + def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + // vector_shuffle v1, (load v2) <2, 1> using MOVLPS + def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS + def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), + VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + + // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS + def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + + // Shuffle with VMOVLPS + def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + + // Shuffle with VMOVLPD + def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE1] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS + def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS + def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), + VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + + // Shuffle with MOVLPS + def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <2, 1> using MOVLPS + def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS + def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + + // Shuffle with MOVLPD + def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Hi packed FP Instructions +//===----------------------------------------------------------------------===// + +let AddedComplexity = 20 in { + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $dst|$dst, $src2}">; +} + // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -411,6 +1131,80 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>; +let Predicates = [HasAVX] in { + // VMOVHPS patterns + def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // is during lowering, where it's not possible to recognize the load fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; + + // FIXME: This should be matched by a X86Movhpd instead. Same as above + def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (VMOVHPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (VMOVHPDmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE1] in { + // MOVHPS patterns + def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (MOVHPSmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE2] in { + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // is during lowering, where it's not possible to recognize the load fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + + // FIXME: This should be matched by a X86Movhpd instead. Same as above + def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPDmr addr:$dst, VR128:$src)>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions +//===----------------------------------------------------------------------===// + let AddedComplexity = 20 in { def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -438,13 +1232,80 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; } -def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; -let AddedComplexity = 20 in { - def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; - def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; +let Predicates = [HasAVX] in { + // MOVLHPS patterns + let AddedComplexity = 20 in { + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + + // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS + def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + } + def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + + // MOVHLPS patterns + let AddedComplexity = 20 in { + // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS + def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; + + // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS + def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), + (VMOVHLPSrr VR128:$src1, VR128:$src1)>; + def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), + (VMOVHLPSrr VR128:$src1, VR128:$src1)>; + } + + def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE1] in { + // MOVLHPS patterns + let AddedComplexity = 20 in { + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + + // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS + def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + } + def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + + // MOVHLPS patterns + let AddedComplexity = 20 in { + // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS + def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + + // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS + def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; + def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; + } + + def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; } //===----------------------------------------------------------------------===// @@ -462,10 +1323,9 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - []>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - []>; + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>; + let mayLoad = 1 in + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -481,36 +1341,39 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W; + VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, + VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX, VEX_W; + VEX, VEX_W, VEX_LIG; // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, - VEX_4V, VEX_W; + VEX_4V, VEX_W, VEX_LIG; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, - VEX_4V, VEX_W; + VEX_4V, VEX_W, VEX_LIG; let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), @@ -579,11 +1442,6 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS, VEX; -defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, - XS, VEX, VEX_W; defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, f128mem, load, "cvtsd2si">, XD, VEX; defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, @@ -594,14 +1452,12 @@ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, // Get rid of this hack or rename the intrinsics, there are several // intructions that only match with the intrinsic form, why create duplicates // to let them be recognized by the assembler? -defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; -defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS; -defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - f32mem, load, "cvtss2si{q}">, XS, REX_W; + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W, + VEX_LIG; + defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, f128mem, load, "cvtsd2si{l}">, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, @@ -660,10 +1516,11 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, let Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, + VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W; + VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; @@ -671,6 +1528,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; } + let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; @@ -681,19 +1539,43 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } +let Predicates = [HasSSE1] in { + def : Pat<(int_x86_sse_cvtss2si VR128:$src), + (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), + (CVTSS2SIrm addr:$src)>; + def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), + (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), + (CVTSS2SI64rm addr:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_cvtss2si VR128:$src), + (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), + (VCVTSS2SIrm addr:$src)>; + def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), + (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), + (VCVTSS2SI64rm addr:$src)>; +} + /// SSE 2 Only // Convert scalar double to scalar single def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; + VEX_4V, VEX_LIG; +let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; + []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, - Requires<[HasAVX]>; + Requires<[HasAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", @@ -715,13 +1597,25 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, Requires<[HasAVX]>, VEX_4V; + []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; +let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; -def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, - Requires<[HasAVX]>; + []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + +let Predicates = [HasAVX] in { + def : Pat<(f64 (fextend FR32:$src)), + (VCVTSS2SDrr FR32:$src, FR32:$src)>; + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; +} + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -732,6 +1626,16 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; +// extload f32 -> f64. This matches load+fextend because we have a hack in +// the isel (PreprocessForFPConvert) that can introduce loads after dag +// combine. +// Since these loads aren't folded into the fextend, we have to match it +// explicitly here. +def : Pat<(fextend (loadf32 addr:$src)), + (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; +def : Pat<(extloadf32 addr:$src), + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -759,10 +1663,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, - Requires<[HasSSE2, OptForSpeed]>; - // Convert doubleword to packed single/double fp // SSE2 instructions without OpSize prefix def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -862,10 +1762,12 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // SSE2 packed instructions with XS prefix def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +let mayLoad = 1 in def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +let mayLoad = 1 in def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -877,7 +1779,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; - def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -889,16 +1790,33 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; -def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, - VEX; -def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>, VEX; +let Predicates = [HasSSE2] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_CVTDQ2PSrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_VCVTDQ2PSrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (VCVTTPS2DQrr VR128:$src)>; + def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), + (VCVTTPS2DQYrr VR256:$src)>; +} + +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX; +let isCodeGenOnly = 1 in +def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>, VEX; def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -910,8 +1828,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; @@ -931,13 +1847,13 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; @@ -947,12 +1863,12 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, - VEX, Requires<[HasAVX]>; + TB, VEX, Requires<[HasAVX]>; def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, - VEX, Requires<[HasAVX]>; + TB, VEX, Requires<[HasAVX]>; def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1038,75 +1954,61 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), (VCVTTPS2DQYrm addr:$src)>; +// Match fround and fextend for 128/256-bit conversions +def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; +def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + +def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; +def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, + SDNode OpNode, ValueType VT, PatFrag ld_frag, string asm, string asm_alt> { - let isAsmParserOnly = 1 in { - def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), - asm, []>; - let mayLoad = 1 in - def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), - asm, []>; - } + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>; + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + [(set RC:$dst, (OpNode (VT RC:$src1), + (ld_frag addr:$src2), imm:$cc))]>; // Accept explicit immediate argument form instead of comparison code. - def rr_alt : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, []>; - let mayLoad = 1 in - def rm_alt : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), - asm_alt, []>; + let neverHasSideEffects = 1 in { + def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>; + } } -let neverHasSideEffects = 1 in { - defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, - "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, - XS, VEX_4V; - defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, - "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, - XD, VEX_4V; -} +defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, + "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + XS, VEX_4V, VEX_LIG; +defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, + "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { -def CMPSSrr : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc), + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS; -def CMPSSrm : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc), - "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS; -def CMPSDrr : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc), - "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD; -def CMPSDrm : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc), + "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + XS; + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD; -} -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { -def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; -def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; + "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + XD; } multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, @@ -1151,25 +2053,28 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, VEX; + "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, OpSize, VEX; + "ucomisd", SSEPackedDouble>, TB, OpSize, VEX, + VEX_LIG; let Pattern = []<dag> in { defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, VEX; + "comiss", SSEPackedSingle>, TB, VEX, + VEX_LIG; defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, OpSize, VEX; + "comisd", SSEPackedDouble>, TB, OpSize, VEX, + VEX_LIG; } defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, VEX; + load, "ucomiss", SSEPackedSingle>, TB, VEX; defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX; defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss", SSEPackedSingle>, VEX; + load, "comiss", SSEPackedSingle>, TB, VEX; defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd", SSEPackedDouble>, OpSize, VEX; + load, "comisd", SSEPackedDouble>, TB, OpSize, VEX; defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, @@ -1199,57 +2104,82 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, Domain d> { let isAsmParserOnly = 1 in { def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>; def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>; } // Accept explicit immediate argument form instead of comparison code. def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc), asm_alt, [], d>; } defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; + "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedSingle>, TB, VEX_4V; defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; + "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedSingle>, TB, VEX_4V; defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}", + "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", + "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedSingle>, TB; defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", + "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", + "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedDouble>, TB, OpSize; } +let Predicates = [HasSSE1] in { def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +} + +let Predicates = [HasSSE2] in { def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions @@ -1293,6 +2223,132 @@ let Constraints = "$src1 = $dst" in { memopv2f64, SSEPackedDouble>, TB, OpSize; } +let Predicates = [HasSSE1] in { + def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but + // fall back to this for SSE1) + def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPSrri case. + def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; +} + +let Predicates = [HasSSE2] in { + // Special binary v4i32 shuffle cases with SHUFPS. + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPDrri cases. + def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v2i64 shuffle cases using SHUFPDrri. + def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Generic SHUFPD patterns + def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but + // fall back to this for SSE1) + def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (VSHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPSrri case. + def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v4i32 shuffle cases with SHUFPS. + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (VSHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (VSHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPDrri cases. + def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v2i64 shuffle cases using SHUFPDrri. + def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (VSHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + + def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + + // 256-bit patterns + def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v8i32 (X86Shufps VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v8f32 (X86Shufps VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v4i64 (X86Shufpd VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v4f64 (X86Shufpd VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Unpack Instructions //===----------------------------------------------------------------------===// @@ -1316,29 +2372,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, let AddedComplexity = 10 in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, @@ -1356,6 +2412,103 @@ let AddedComplexity = 10 in { } // Constraints = "$src1 = $dst" } // AddedComplexity +let Predicates = [HasSSE1] in { + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (UNPCKLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (UNPCKHPSrr VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (UNPCKLPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (UNPCKHPDrr VR128:$src1, VR128:$src2)>; + + // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // problem is during lowering, where it's not possible to recognize the load + // fold cause it has two uses through a bitcast. One use disappears at isel + // time and the fold opportunity reappears. + def : Pat<(v2f64 (X86Movddup VR128:$src)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; + + let AddedComplexity = 10 in + def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; + + // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // problem is during lowering, where it's not possible to recognize the load + // fold cause it has two uses through a bitcast. One use disappears at isel + // time and the fold opportunity reappears. + def : Pat<(v2f64 (X86Movddup VR128:$src)), + (VUNPCKLPDrr VR128:$src, VR128:$src)>; + let AddedComplexity = 10 in + def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), + (VUNPCKLPDrr VR128:$src, VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Extract Floating-Point Sign mask //===----------------------------------------------------------------------===// @@ -1370,91 +2523,60 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; } -// Mask creation -defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, - "movmskps", SSEPackedSingle>, VEX; -defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; -defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, - "movmskps", SSEPackedSingle>, VEX; -defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", SSEPackedSingle>, TB; defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; -// X86fgetsign -def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize; -def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize; -def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB; -def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB; - -// Assembler Only -def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; -def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; -def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; -def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; - -//===----------------------------------------------------------------------===// -// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions -//===----------------------------------------------------------------------===// - -// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have -// names that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in { - // FIXME: Set encoding to pseudo! -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; -def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; -def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; -} - -// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper -// bits are disregarded. -let neverHasSideEffects = 1 in { -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; -} +def : Pat<(i32 (X86fgetsign FR32:$src)), + (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i64 (X86fgetsign FR32:$src)), + (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i32 (X86fgetsign FR64:$src)), + (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; +def : Pat<(i64 (X86fgetsign FR64:$src)), + (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; -// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper -// bits are disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in { -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +let Predicates = [HasAVX] in { + defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, + "movmskps", SSEPackedSingle>, TB, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, + "movmskpd", SSEPackedDouble>, TB, + OpSize, VEX; + defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, + "movmskps", SSEPackedSingle>, TB, VEX; + defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, + "movmskpd", SSEPackedDouble>, TB, + OpSize, VEX; + + def : Pat<(i32 (X86fgetsign FR32:$src)), + (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>; + def : Pat<(i64 (X86fgetsign FR32:$src)), + (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>; + def : Pat<(i32 (X86fgetsign FR64:$src)), + (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>; + def : Pat<(i64 (X86fgetsign FR64:$src)), + (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>; + + // Assembler Only + def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + OpSize, VEX; + def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + OpSize, VEX; } //===----------------------------------------------------------------------===// @@ -1466,10 +2588,10 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, - FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, - FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, @@ -1494,21 +2616,22 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { - let Pattern = []<dag> in { - defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f128mem, - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, - OpSize, VEX_4V; - } + // In AVX no need to add a pattern for 128-bit logical rr ps, because they + // are all promoted to v2i64, and the patterns are covered by the int + // version. This is needed in SSE only, because v2i64 isn't supported on + // SSE1, but only on SSE2. + defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, [], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, @@ -1533,7 +2656,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "ps"), f256mem, [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))], 0>, VEX_4V; + (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, @@ -1541,7 +2664,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, (bc_v4i64 (v4f64 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), (memopv4i64 addr:$src2)))], 0>, - OpSize, VEX_4V; + TB, OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms @@ -1632,32 +2755,32 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { // Binary Arithmetic instructions defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", 0>, - basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG; +defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", 0>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG; +defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG; + defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", 0>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG; + defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG; + defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, basic_sse12_fp_binop_p_int<0x5F, "max", 0>, basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG; + defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, basic_sse12_fp_binop_p_int<0x5D, "min", 0>, basic_sse12_fp_binop_p_y_int<0x5D, "min">, basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; @@ -1720,23 +2843,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int> { +multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, XS, Requires<[HasAVX, OptForSize]>; - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + let mayLoad = 1 in + def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat(OpcodeStr, - "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))]>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins ssmem:$src1, VR128:$src2), !strconcat(OpcodeStr, - "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -1801,21 +2919,17 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int> { +multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), + def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))]>; - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -1863,9 +2977,8 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, - VEX_4V; + defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, + sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, @@ -1879,21 +2992,76 @@ let Predicates = [HasAVX] in { // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt, - int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>, - VEX_4V; + defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; } +def : Pat<(f32 (fsqrt FR32:$src)), + (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; +def : Pat<(f64 (fsqrt FR64:$src)), + (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; +def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +def : Pat<(f32 (X86frsqrt FR32:$src)), + (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +def : Pat<(f32 (X86frcp FR32:$src)), + (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_sqrt_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VSQRTSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), + (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; + + def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), + (VSQRTSDr (f64 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), + sub_sd)>; + def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), + (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; + + def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VRSQRTSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), + (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; + + def : Pat<(int_x86_sse_rcp_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VRCPSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), + (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +} + // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, sse1_fp_unop_p<0x51, "sqrt", fsqrt>, @@ -1992,7 +3160,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -2006,7 +3174,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } //===----------------------------------------------------------------------===// -// SSE 1 & 2 - Misc Instructions (No AVX form) +// SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. @@ -2019,66 +3187,26 @@ def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; -// Load, store, and memory fence -def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - TB, Requires<[HasSSE1]>; -def : Pat<(X86SFence), (SFENCE)>; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementation, it does not expand the instructions below like -// X86MCInstLower does. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; -def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>; -let ExeDomain = SSEPackedInt in -def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; -} - -// The same as done above but for AVX. The 128-bit versions are the -// same, but re-encoded. The 256-bit does not support PI version, and -// doesn't need it because on sandy bridge the register is set to zero -// at the rename stage without using any execution unit, so SET0PSY -// and SET0PDY can be used for vector int instructions without penalty -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like -// X86MCInstLower does. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, Predicates = [HasAVX] in { -def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V; -def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; -let ExeDomain = SSEPackedInt in -def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; -} +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; -def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// Load, store, and memory fence +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), + "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; -// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while -// in the non-AVX version bits 127:64 aren't touched. Find a better way to -// represent this instead of always zeroing SRC1. One possible solution is -// to represent the instruction w/ something similar as the "$src1 = $dst" -// constraint but without the tied operands. -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, - Requires<[HasAVX, OptForSpeed]>; +def : Pat<(X86SFence), (SFENCE)>; +def : Pat<(X86LFence), (LFENCE)>; +def : Pat<(X86MFence), (MFENCE)>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register @@ -2106,10 +3234,22 @@ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; } -def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; -def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; +def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + +// For Disassembler +let isCodeGenOnly = 1 in { +def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +} let canFoldAsLoad = 1, mayLoad = 1 in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -2147,6 +3287,16 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", []>, XS, Requires<[HasSSE2]>; +// For Disassembler +let isCodeGenOnly = 1 in { +def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>; + +def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + []>, XS, Requires<[HasSSE2]>; +} + let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", @@ -2180,9 +3330,11 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // ExeDomain = SSEPackedInt -def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; -def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), - (VMOVDQUYmr addr:$dst, VR256:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; + def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), + (VMOVDQUYmr addr:$dst, VR256:$src)>; +} //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Arithmetic Instructions @@ -2415,15 +3567,14 @@ let ExeDomain = SSEPackedInt in { def VPANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>, VEX_4V; + [(set VR128:$dst, + (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; def VPANDNrm : PDI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>, - VEX_4V; + [(set VR128:$dst, (X86andnp VR128:$src1, + (memopv2i64 addr:$src2)))]>, VEX_4V; } } @@ -2527,6 +3678,32 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, 0>, VEX_4V; + + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), + (VPCMPEQBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + (VPCMPEQBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), + (VPCMPEQWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + (VPCMPEQWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), + (VPCMPEQDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + (VPCMPEQDrm VR128:$src1, addr:$src2)>; + + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), + (VPCMPGTBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + (VPCMPGTBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), + (VPCMPGTWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + (VPCMPGTWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), + (VPCMPGTDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + (VPCMPGTDrm VR128:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -2538,31 +3715,33 @@ let Constraints = "$src1 = $dst" in { defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; } // Constraints = "$src1 = $dst" -def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), - (PCMPEQBrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), - (PCMPEQBrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), - (PCMPEQWrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), - (PCMPEQWrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), - (PCMPEQDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), - (PCMPEQDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), - (PCMPGTBrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), - (PCMPGTBrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), - (PCMPGTWrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), - (PCMPGTWrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), - (PCMPGTDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), - (PCMPGTDrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE2] in { + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), + (PCMPEQBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + (PCMPEQBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), + (PCMPEQWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + (PCMPEQWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), + (PCMPEQDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + (PCMPEQDrm VR128:$src1, addr:$src2)>; + + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), + (PCMPGTBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + (PCMPGTBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), + (PCMPGTWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + (PCMPGTWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), + (PCMPGTDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + (PCMPGTDrm VR128:$src1, addr:$src2)>; +} //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions @@ -2608,7 +3787,7 @@ def mi : Ii8<0x70, MRMSrcMem, let Predicates = [HasAVX] in { let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize, VEX; // SSE2 with ImmT == Imm8 and XS prefix. @@ -2618,6 +3797,34 @@ let Predicates = [HasAVX] in { // SSE2 with ImmT == Imm8 and XD prefix. defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, VEX; + + let AddedComplexity = 5 in + def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + // Unary v4f32 shuffle with VPSHUF* in order to fold a load. + def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (VPSHUFHWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (VPSHUFHWmi addr:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (VPSHUFLWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (VPSHUFLWmi addr:$src, imm:$imm)>; } let Predicates = [HasSSE2] in { @@ -2629,6 +3836,34 @@ let Predicates = [HasSSE2] in { // SSE2 with ImmT == Imm8 and XD prefix. defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; + + let AddedComplexity = 5 in + def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + // Unary v4f32 shuffle with PSHUF* in order to fold a load. + def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (PSHUFHWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (PSHUFLWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -2637,71 +3872,69 @@ let Predicates = [HasSSE2] in { let ExeDomain = SSEPackedInt in { multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, - PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { + SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>; + [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (unp_frag VR128:$src1, + [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))]>; } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, - 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, - 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, - 0>, VEX_4V; + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, + bc_v16i8, 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, + bc_v8i16, 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, + bc_v4i32, 0>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, + VR128:$src2)))]>, VEX_4V; def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; - - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, - 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, - 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, - 0>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, + bc_v16i8, 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, + bc_v8i16, 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, + bc_v4i32, 0>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, + VR128:$src2)))]>, VEX_4V; def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>; + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. @@ -2709,17 +3942,17 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, + (v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))))]>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>; + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. @@ -2727,17 +3960,24 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, + (v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))))]>; } - } // ExeDomain = SSEPackedInt +// Splat v2f64 / v2i64 +let AddedComplexity = 10 in { + def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), + (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), + (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>; +} + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -2769,7 +4009,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2778,11 +4018,11 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, // Insert let Predicates = [HasAVX] in { - defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; + defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in @@ -2839,7 +4079,9 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), // SSE2 - Move Doubleword //===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Int Doubleword to Packed Double Int +// def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2849,6 +4091,14 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, VEX; +def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector GR64:$src)))]>, VEX; +def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX; + def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2865,8 +4115,9 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>; - +//===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar +// def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; @@ -2883,7 +4134,9 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; +//===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int +// def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2902,22 +4155,48 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; -def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +//===---------------------------------------------------------------------===// +// Move Packed Doubleword Int first element to Doubleword Int +// +def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>, + TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; + +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>; + +//===---------------------------------------------------------------------===// +// Bitcast FR64 <-> GR64 +// +let Predicates = [HasAVX] in +def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + VEX; +def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>; + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; +def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; -def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; -def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; - +//===---------------------------------------------------------------------===// // Move Scalar Single to Double Int +// def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; @@ -2931,7 +4210,9 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; -// movd / movq to XMM register zero-extends +//===---------------------------------------------------------------------===// +// Patterns and instructions to describe movd/movq to XMM register zero-extends +// let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -2967,15 +4248,36 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>; +} -def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), (MOVZDI2PDIrm addr:$src)>; -def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; -def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; } +let Predicates = [HasAVX] in { + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), + (VMOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (VMOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVZDI2PDIrm addr:$src)>; + } + // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), + (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; +} + // These are the correct encodings of the instructions so that we know how to // read correct assembly, even though we continue to emit the wrong ones for // compatibility with Darwin's buggy assembler. @@ -2996,7 +4298,9 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", // SSE2 - Move Quadword //===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int +// def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3008,7 +4312,9 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix +//===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int +// def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -3018,10 +4324,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - +//===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. +// def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; @@ -3037,7 +4342,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (loadi64 addr:$src))))))]>, XS, VEX, Requires<[HasAVX]>; -let AddedComplexity = 20 in { +let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3045,15 +4350,27 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (loadi64 addr:$src))))))]>, XS, Requires<[HasSSE2]>; -def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; -def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), (MOVZQI2PQIrm addr:$src)>; -def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; +} + +let Predicates = [HasAVX], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), + (VMOVZQI2PQIrm addr:$src)>; } +//===---------------------------------------------------------------------===// // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +// let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -3077,9 +4394,21 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; +} -def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), - (MOVZPQILo2PQIrm addr:$src)>; +let AddedComplexity = 20 in { + let Predicates = [HasSSE2] in { + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (MOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>; + } + let Predicates = [HasAVX] in { + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (VMOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (VMOVZPQILo2PQIrr VR128:$src)>; + } } // Instructions to match in the assembler @@ -3102,37 +4431,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; //===---------------------------------------------------------------------===// -// SSE2 - Misc Instructions -//===---------------------------------------------------------------------===// - -// Flush cache -def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; - -// Load, store, and memory fence -def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; -def : Pat<(X86LFence), (LFENCE)>; -def : Pat<(X86MFence), (MFENCE)>; - - -// Pause. This "instruction" is encoded as "rep; nop", so even though it -// was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in - // FIXME: Change encoding to pseudo. - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; - -//===---------------------------------------------------------------------===// // SSE3 - Conversion Instructions //===---------------------------------------------------------------------===// @@ -3164,6 +4462,11 @@ def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTPD2DQYrr VR256:$src)>; +def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTPD2DQYrm addr:$src)>; + // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -3192,41 +4495,74 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), (VCVTPD2DQYrm addr:$src)>; +def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; +def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))), + (VCVTDQ2PDYrm addr:$src)>; + //===---------------------------------------------------------------------===// -// SSE3 - Move Instructions +// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// - -// Replicate Single FP -multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> { -def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), +multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, + ValueType vt, RegisterClass RC, PatFrag mem_frag, + X86MemOperand x86memop> { +def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (rep_frag - VR128:$src, (undef))))]>; -def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + [(set RC:$dst, (vt (OpNode RC:$src)))]>; +def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (rep_frag - (memopv4f32 addr:$src), (undef)))]>; + [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>; } -multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag, - string OpcodeStr> { -def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; -def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +let Predicates = [HasAVX] in { + defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v8f32, VR256, memopv8f32, f256mem>, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v8f32, VR256, memopv8f32, f256mem>, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, + memopv4f32, f128mem>; +defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, + memopv4f32, f128mem>; + +let Predicates = [HasSSE3] in { + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSLDUPrm addr:$src)>; } let Predicates = [HasAVX] in { - // FIXME: Merge above classes when we have patterns for the ymm version - defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; - defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; - defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX; - defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX; + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (VMOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (VMOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (VMOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (VMOVSLDUPrm addr:$src)>; + def : Pat<(v8i32 (X86Movshdup VR256:$src)), + (VMOVSHDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSHDUPYrm addr:$src)>; + def : Pat<(v8i32 (X86Movsldup VR256:$src)), + (VMOVSLDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSLDUPYrm addr:$src)>; } -defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; -defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; -// Replicate Double FP +//===---------------------------------------------------------------------===// +// SSE3 - Replicate Double FP - MOVDDUP +//===---------------------------------------------------------------------===// + multiclass sse3_replicate_dfp<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3238,23 +4574,90 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (undef))))]>; } +// FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { -def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; -def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; +let Predicates = [HasAVX] in { + def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; + def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; + } +} + +defm MOVDDUP : sse3_replicate_dfp<"movddup">; +defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; + +let Predicates = [HasSSE3] in { + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (MOVDDUPrm addr:$src)>; + let AddedComplexity = 5 in { + def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + } + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; } let Predicates = [HasAVX] in { - // FIXME: Merge above classes when we have patterns for the ymm version - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; - defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (VMOVDDUPrm addr:$src)>; + let AddedComplexity = 5 in { + def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (VMOVDDUPrm addr:$src)>; + } + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + + // 256-bit version + def : Pat<(X86Movddup (memopv4f64 addr:$src)), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (memopv4i64 addr:$src)), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4f64 VR256:$src)), + (VMOVDDUPYrr VR256:$src)>; + def : Pat<(X86Movddup (v4i64 VR256:$src)), + (VMOVDDUPYrr VR256:$src)>; } -defm MOVDDUP : sse3_replicate_dfp<"movddup">; -// Move Unaligned Integer +//===---------------------------------------------------------------------===// +// SSE3 - Move Unaligned Integer +//===---------------------------------------------------------------------===// + let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -3267,38 +4670,6 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; -def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// Several Move patterns -let AddedComplexity = 5 in { -def : Pat<(movddup (memopv2f64 addr:$src), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (memopv2i64 addr:$src), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -} - -// vector_shuffle v1, <undef> <1, 1, 3, 3> -let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), - (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <0, 0, 2, 2> -let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), - (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; - //===---------------------------------------------------------------------===// // SSE3 - Arithmetic //===---------------------------------------------------------------------===// @@ -3344,62 +4715,58 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3], // Horizontal ops multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; } let Predicates = [HasAVX] in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - int_x86_sse3_hadd_ps, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - int_x86_sse3_hadd_pd, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - int_x86_sse3_hsub_ps, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - int_x86_sse3_hsub_pd, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - int_x86_avx_hadd_ps_256, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - int_x86_avx_hadd_pd_256, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - int_x86_avx_hsub_ps_256, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - int_x86_avx_hsub_pd_256, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, - int_x86_sse3_hadd_ps>; - defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, - int_x86_sse3_hadd_pd>; - defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, - int_x86_sse3_hsub_ps>; - defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, - int_x86_sse3_hsub_pd>; + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; } //===---------------------------------------------------------------------===// @@ -3466,7 +4833,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in { +let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; @@ -3525,17 +4892,33 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128>; } -def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; -def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +let Predicates = [HasSSSE3] in { + def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>; + def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>; + + def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (PSIGNBrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (PSIGNWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (PSIGNDrr128 VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (VPSHUFBrr128 VR128:$src, VR128:$mask)>; + def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (VPSHUFBrm128 VR128:$src, addr:$mask)>; -def : Pat<(X86psignb VR128:$src1, VR128:$src2), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; -def : Pat<(X86psignw VR128:$src1, VR128:$src2), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; -def : Pat<(X86psignd VR128:$src1, VR128:$src2), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; + def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; +} //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns @@ -3560,33 +4943,35 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; -let Constraints = "$src1 = $dst" in +let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; -let AddedComplexity = 5 in { -def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; +let Predicates = [HasSSSE3] in { +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// -// SSSE3 Misc Instructions +// SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// -// Thread synchronization let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; @@ -3609,338 +4994,6 @@ def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, Requires<[In64BitMode]>; -//===---------------------------------------------------------------------===// -// Non-Instruction Patterns -//===---------------------------------------------------------------------===// - -// extload f32 -> f64. This matches load+fextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag -// combine. -// Since these loads aren't folded into the fextend, we have to match it -// explicitly here. -let Predicates = [HasSSE2] in - def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>; - -// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while -// in the non-AVX version bits 127:64 aren't touched. Find a better way to -// represent this instead of always zeroing SRC1. One possible solution is -// to represent the instruction w/ something similar as the "$src1 = $dst" -// constraint but without the tied operands. -let Predicates = [HasAVX] in - def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), - addr:$src)>; - -// bit_convert -let Predicates = [HasXMMInt] in { - def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; -} - -let Predicates = [HasAVX] in { - def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; -} - -// Move scalar to XMM zero-extended -// movd to XMM register zero-extends -let AddedComplexity = 15 in { -// Zeroing a VR128 then do a MOVS{S|D} to the lower bits. -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>; -def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; -def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0PS)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; -def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0PI)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; -} - -// Splat v2f64 / v2i64 -let AddedComplexity = 10 in { -def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), - (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2f64 VR128:$src), (undef)), - (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), - (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2i64 VR128:$src), (undef)), - (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -} - -// Special unary SHUFPSrri case. -def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; -let AddedComplexity = 5 in -def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; - -// Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; - -// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -} -let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>; -def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>; -def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>; -def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>; -} - -// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -} -let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>; -def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>; -def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>; -def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>; -} - -let AddedComplexity = 20 in { -// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; - -// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; - -// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; -} - -let AddedComplexity = 20 in { -// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -} - -// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; - -let AddedComplexity = 15 in { -// Setting the lowest element in the vector. -def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; -def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - -// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd -def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, - Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, - Requires<[HasSSE2]>; -} - -// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but -// fall back to this for SSE1) -def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - -// Set lowest element and zero upper elements. -def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; - -// vector -> vector casts -def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; - -// Use movaps / movups for SSE integer load / store (one byte shorter). -// The instructions selected below are then converted to MOVDQA/MOVDQU -// during the SSE domain pass. -let Predicates = [HasSSE1] in { - def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -} - -// Use vmovaps/vmovups for AVX integer load/store. -let Predicates = [HasAVX] in { - // 128-bit load/store - def : Pat<(alignedloadv4i32 addr:$src), - (VMOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (VMOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (VMOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (VMOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - - // 256-bit load/store - def : Pat<(alignedloadv4i64 addr:$src), - (VMOVAPSYrm addr:$src)>; - def : Pat<(loadv4i64 addr:$src), - (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedloadv8i32 addr:$src), - (VMOVAPSYrm addr:$src)>; - def : Pat<(loadv8i32 addr:$src), - (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v4i64 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v8i32 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; -} - //===----------------------------------------------------------------------===// // SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// @@ -3979,36 +5032,71 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; -// Common patterns involving scalar load. -def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load. + def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>; +} + +let Predicates = [HasAVX] in { + // Common patterns involving scalar load. + def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (VPMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (VPMOVSXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (VPMOVSXWDrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (VPMOVSXDQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (VPMOVZXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (VPMOVZXWDrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (VPMOVZXDQrm addr:$src)>; +} multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { @@ -4039,17 +5127,31 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (PMOVSXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (PMOVSXWQrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (PMOVZXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (PMOVZXWQrm addr:$src)>; +} -def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasAVX] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (VPMOVSXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (VPMOVSXWQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (VPMOVZXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (VPMOVZXWQrm addr:$src)>; +} multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -4073,16 +5175,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBQrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBQrm addr:$src)>; +} + +let Predicates = [HasAVX] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXBQrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Extract Instructions @@ -4208,7 +5325,12 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + Requires<[HasSSE41]>; +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasAVX]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -4297,7 +5419,7 @@ let Constraints = "$src1 = $dst" in // in the target vector. multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -4306,7 +5428,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), + (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -4348,7 +5470,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, // Vector intrinsic operation, mem def PSm : Ii8<opcps, MRMSrcMem, - (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, @@ -4366,7 +5488,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, // Vector intrinsic operation, mem def PDm : SS4AIi8<opcpd, MRMSrcMem, - (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, @@ -4501,14 +5623,14 @@ let Predicates = [HasAVX] in { int_x86_avx_round_pd_256>, VEX; defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, - int_x86_sse41_round_sd, 0>, VEX_4V; + int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; // Instructions for the assembler defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, VEX; defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V; + defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -4578,26 +5700,34 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// -def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS; -def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS; - -def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop GR32:$src))]>, XS; -def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS; - -def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop GR64:$src))]>, XS; -def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS; +let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { + def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, + OpSize, XS; + def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop (loadi16 addr:$src))), + (implicit EFLAGS)]>, OpSize, XS; + + def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, + XS; + def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, + XS; + def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} @@ -4666,6 +5796,11 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (VPCMPEQQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (VPCMPEQQrm VR128:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -4720,7 +5855,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i32i8imm:$src3), + (ins RC:$src1, RC:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -4729,7 +5864,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, OpSize; def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i32i8imm:$src3), + (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -4815,6 +5950,36 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; +let Predicates = [HasAVX] in { + def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), + (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), + (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), + (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), + (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), + (v8i32 VR256:$src2))), + (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), + (v8f32 VR256:$src2))), + (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), + (v4i64 VR256:$src2))), + (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), + (v4f64 VR256:$src2))), + (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; +} + /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { @@ -4835,12 +6000,27 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { } } -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; - -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), - (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; + +let Predicates = [HasSSE41] in { + def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), + (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), + (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), + (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), + (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; +} let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4876,9 +6056,16 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX] in { defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (VPCMPGTQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (VPCMPGTQrm VR128:$src1, addr:$src2)>; +} + let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; @@ -5158,22 +6345,43 @@ let Constraints = "$src1 = $dst" in { int_x86_aesni_aesdeclast>; } -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (AESENCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (AESENCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (AESENCLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (AESENCLASTrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (AESDECrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (AESDECrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (AESDECLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (AESDECLASTrm VR128:$src1, addr:$src2)>; +let Predicates = [HasAES] in { + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (AESENCrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (AESENCrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (AESENCLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (AESENCLASTrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (AESDECrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (AESDECrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (AESDECLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (AESDECLASTrm VR128:$src1, addr:$src2)>; +} + +let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in { + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (VAESENCrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (VAESENCrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (VAESENCLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (VAESENCLASTrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (VAESDECrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (VAESDECrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (VAESDECLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (VAESDECLASTrm VR128:$src1, addr:$src2)>; +} // Perform the AES InvMixColumn Transformation let Predicates = [HasAVX, HasAES] in { @@ -5288,8 +6496,10 @@ defm : pclmul_alias<"lqlq", 0x00>; // AVX Instructions //===----------------------------------------------------------------------===// - -// Load from memory and broadcast to all elements of the destination operand +//===----------------------------------------------------------------------===// +// VBROADCAST - Load from memory and broadcast to all elements of the +// destination operand +// class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> : AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), @@ -5305,7 +6515,26 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -// Insert packed floating-point values +def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), + (VBROADCASTF128 addr:$src)>; + +def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSY addr:$src)>; +def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VBROADCASTSD addr:$src)>; +def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSY addr:$src)>; +def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSD addr:$src)>; + +def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSS addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSS addr:$src)>; + +//===----------------------------------------------------------------------===// +// VINSERTF128 - Insert packed floating-point values +// def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5315,7 +6544,41 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; -// Extract packed floating-point values +def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + +//===----------------------------------------------------------------------===// +// VEXTRACTF128 - Extract packed floating-point values +// def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -5325,7 +6588,41 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -// Conditional SIMD Packed Loads and Stores +def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +//===----------------------------------------------------------------------===// +// VMASKMOV - Conditional SIMD Packed Loads and Stores +// multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, Intrinsic IntSt, Intrinsic IntSt256, @@ -5363,7 +6660,9 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskstore_pd_256, memopv2f64, memopv4f64>; -// Permute Floating-Point Values +//===----------------------------------------------------------------------===// +// VPERMIL - Permute Single and Double Floating-Point Values +// multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, @@ -5404,6 +6703,18 @@ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, int_x86_avx_vpermilvar_pd_256, int_x86_avx_vpermil_pd_256>; +def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), + (VPERMILPSYri VR256:$src1, imm:$imm)>; +def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), + (VPERMILPDYri VR256:$src1, imm:$imm)>; +def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), + (VPERMILPSYri VR256:$src1, imm:$imm)>; +def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), + (VPERMILPDYri VR256:$src1, imm:$imm)>; + +//===----------------------------------------------------------------------===// +// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5413,65 +6724,6 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; -// Zero All YMM registers -def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>; - -// Zero Upper bits of YMM registers -def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", - [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; - -def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; - -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - -def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; - -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; - -def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), - (VBROADCASTF128 addr:$src)>; - def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), @@ -5489,377 +6741,59 @@ def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + //===----------------------------------------------------------------------===// -// SSE Shuffle pattern fragments -//===----------------------------------------------------------------------===// +// VZERO - Zero YMM registers +// +let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { + // Zero All YMM registers + def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", + [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; -// This is part of a "work in progress" refactoring. The idea is that all -// vector shuffles are going to be translated into target specific nodes and -// directly matched by the patterns below (which can be changed along the way) -// The AVX version of some but not all of them are described here, and more -// should come in a near future. - -// Shuffle with PSHUFD instruction folding loads. The first two patterns match -// SSE2 loads, which are always promoted to v2i64. The last one should match -// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD -// in SSE2, how does it ever worked? Anyway, the pattern will remain here until -// we investigate further. -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked? - -// Shuffle with PSHUFD instruction. -def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - -def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - -// Shuffle with SHUFPD instruction. -def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - -def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - -// Shuffle with SHUFPS instruction. -def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - -def : Pat<(v4i32 (X86Shufps VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86Shufps VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - -// Shuffle with MOVHLPS instruction -def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with MOVDDUP instruction -def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (MOVDDUPrm addr:$src)>; - - -// Shuffle with UNPCKLPS -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), - (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), - (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKHPS -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKHPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), - (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), - (UNPCKHPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKLPD -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), - (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), - (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKHPD -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKHPDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), - (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), - (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLBW -def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PUNPCKLBWrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)), - (PUNPCKLBWrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLWD -def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PUNPCKLWDrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)), - (PUNPCKLWDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLDQ -def : Pat<(v4i32 (X86Punpckldq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PUNPCKLDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)), - (PUNPCKLDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLQDQ -def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))), - (PUNPCKLQDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)), - (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHBW -def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PUNPCKHBWrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)), - (PUNPCKHBWrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHWD -def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PUNPCKHWDrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)), - (PUNPCKHWDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHDQ -def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PUNPCKHDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)), - (PUNPCKHDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHQDQ -def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))), - (PUNPCKHQDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)), - (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with MOVLHPS -def : Pat<(X86Movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVHPSrm VR128:$src1, addr:$src2)>; -def : Pat<(X86Movlhps VR128:$src1, - (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; - -// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v2f64 (X86Movddup VR128:$src)), - (UNPCKLPDrr VR128:$src, VR128:$src)>; - -// Shuffle with MOVLHPD -def : Pat<(v2f64 (X86Movlhpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVHPDrm VR128:$src1, addr:$src2)>; - -// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVHPDrm VR128:$src1, addr:$src2)>; - -// Shuffle with MOVSS -def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), - (MOVSSrr VR128:$src1, FR32:$src2)>; -def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; -def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(X86Movss VR128:$src1, - (bc_v4i32 (v2i64 (load addr:$src2)))), - (MOVLPSrm VR128:$src1, addr:$src2)>; - -// Shuffle with MOVSD -def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), - (MOVSDrr VR128:$src1, FR64:$src2)>; -def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; -def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; -def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; -def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; - -// Shuffle with MOVSHDUP -def : Pat<(v4i32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; -def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))), - (MOVSHDUPrm addr:$src)>; - -def : Pat<(v4f32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; -def : Pat<(X86Movshdup (memopv4f32 addr:$src)), - (MOVSHDUPrm addr:$src)>; - -// Shuffle with MOVSLDUP -def : Pat<(v4i32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; -def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))), - (MOVSLDUPrm addr:$src)>; - -def : Pat<(v4f32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; -def : Pat<(X86Movsldup (memopv4f32 addr:$src)), - (MOVSLDUPrm addr:$src)>; - -// Shuffle with PSHUFHW -def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), - (PSHUFHWri VR128:$src, imm:$imm)>; -def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), - (PSHUFHWmi addr:$src, imm:$imm)>; - -// Shuffle with PSHUFLW -def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), - (PSHUFLWri VR128:$src, imm:$imm)>; -def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), - (PSHUFLWmi addr:$src, imm:$imm)>; - -// Shuffle with PALIGN -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + // Zero Upper bits of YMM registers + def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", + [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; +} -// Shuffle with MOVLPS -def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(X86Movlps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; - -def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; - -// Shuffle with MOVLPD -def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v2f64 (X86Movlpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVLPDrm VR128:$src1, addr:$src2)>; - -// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD -def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst), - (MOVHPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), - (MOVHPDmr addr:$dst, VR128:$src)>; - -def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v4i32 (X86Movlps - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; - -def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; +//===----------------------------------------------------------------------===// +// Half precision conversion instructions +// +let Predicates = [HasAVX, HasF16C] in { + def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td index 31de878..05a5b36 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td @@ -67,43 +67,43 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, // let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), - "in{b}\t{%dx, %al|%AL, %DX}", []>; + "in{b}\t{%dx, %al|AL, DX}", []>; let Defs = [AX], Uses = [DX] in def IN16rr : I<0xED, RawFrm, (outs), (ins), - "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; + "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize; let Defs = [EAX], Uses = [DX] in def IN32rr : I<0xED, RawFrm, (outs), (ins), - "in{l}\t{%dx, %eax|%EAX, %DX}", []>; + "in{l}\t{%dx, %eax|EAX, DX}", []>; let Defs = [AL] in def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), - "in{b}\t{$port, %al|%AL, $port}", []>; + "in{b}\t{$port, %al|AL, $port}", []>; let Defs = [AX] in def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; + "in{w}\t{$port, %ax|AX, $port}", []>, OpSize; let Defs = [EAX] in def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{l}\t{$port, %eax|%EAX, $port}", []>; + "in{l}\t{$port, %eax|EAX, $port}", []>; let Uses = [DX, AL] in def OUT8rr : I<0xEE, RawFrm, (outs), (ins), - "out{b}\t{%al, %dx|%DX, %AL}", []>; + "out{b}\t{%al, %dx|DX, AL}", []>; let Uses = [DX, AX] in def OUT16rr : I<0xEF, RawFrm, (outs), (ins), - "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; + "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize; let Uses = [DX, EAX] in def OUT32rr : I<0xEF, RawFrm, (outs), (ins), - "out{l}\t{%eax, %dx|%DX, %EAX}", []>; + "out{l}\t{%eax, %dx|DX, EAX}", []>; let Uses = [AL] in def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), - "out{b}\t{%al, $port|$port, %AL}", []>; + "out{b}\t{%al, $port|$port, AL}", []>; let Uses = [AX] in def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; + "out{w}\t{%ax, $port|$port, AX}", []>, OpSize; let Uses = [EAX] in def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{l}\t{%eax, $port|$port, %EAX}", []>; + "out{l}\t{%eax, $port|$port, EAX}", []>; def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize; @@ -229,65 +229,65 @@ def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t{$src}", []>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), - "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), - "push{l}\t%cs", []>, Requires<[In32BitMode]>; + "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>; def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), - "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), - "push{l}\t%ss", []>, Requires<[In32BitMode]>; + "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>; def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), - "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), - "push{l}\t%ds", []>, Requires<[In32BitMode]>; + "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>; def PUSHES16 : I<0x06, RawFrm, (outs), (ins), - "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize; def PUSHES32 : I<0x06, RawFrm, (outs), (ins), - "push{l}\t%es", []>, Requires<[In32BitMode]>; + "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>; def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), - "push{w}\t%fs", []>, OpSize, TB; + "push{w}\t{%fs|FS}", []>, OpSize, TB; def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>; def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), - "push{w}\t%gs", []>, OpSize, TB; + "push{w}\t{%gs|GS}", []>, OpSize, TB; def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t%fs", []>, TB; + "push{q}\t{%fs|FS}", []>, TB; def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t%gs", []>, TB; + "push{q}\t{%gs|GS}", []>, TB; // No "pop cs" instruction. def POPSS16 : I<0x17, RawFrm, (outs), (ins), - "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>; def POPSS32 : I<0x17, RawFrm, (outs), (ins), - "pop{l}\t%ss", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>; def POPDS16 : I<0x1F, RawFrm, (outs), (ins), - "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>; def POPDS32 : I<0x1F, RawFrm, (outs), (ins), - "pop{l}\t%ds", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>; def POPES16 : I<0x07, RawFrm, (outs), (ins), - "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>; def POPES32 : I<0x07, RawFrm, (outs), (ins), - "pop{l}\t%es", []> , Requires<[In32BitMode]>; + "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>; def POPFS16 : I<0xa1, RawFrm, (outs), (ins), - "pop{w}\t%fs", []>, OpSize, TB; + "pop{w}\t{%fs|FS}", []>, OpSize, TB; def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>; def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t%fs", []>, TB; + "pop{q}\t{%fs|FS}", []>, TB; def POPGS16 : I<0xa9, RawFrm, (outs), (ins), - "pop{w}\t%gs", []>, OpSize, TB; + "pop{w}\t{%gs|GS}", []>, OpSize, TB; def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>; def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t%gs", []>, TB; + "pop{q}\t{%gs|GS}", []>, TB; def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), @@ -400,12 +400,29 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +//===----------------------------------------------------------------------===// +// XSAVE instructions let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [RDX, RAX, RCX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; +let Uses = [RDX, RAX] in { + def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), + "xsave\t$dst", []>, TB; + def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), + "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor\t$dst", []>, TB; + def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), + "xsaveopt\t$dst", []>, TB; + def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), + "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions let Defs = [RAX, RDI], Uses = [RDX, RDI] in @@ -427,3 +444,24 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { } let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6; + +//===----------------------------------------------------------------------===// +// FS/GS Base Instructions +let Predicates = [In64BitMode] in { + def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), + "rdfsbase{l}\t$dst", []>, TB, XS; + def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), + "rdfsbase{q}\t$dst", []>, TB, XS; + def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), + "rdgsbase{l}\t$dst", []>, TB, XS; + def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), + "rdgsbase{q}\t$dst", []>, TB, XS; + def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst), + "wrfsbase{l}\t$dst", []>, TB, XS; + def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst), + "wrfsbase{q}\t$dst", []>, TB, XS; + def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst), + "wrgsbase{l}\t$dst", []>, TB, XS; + def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst), + "wrgsbase{q}\t$dst", []>, TB, XS; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td index daf61e4..09a7a7d0c 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrVMX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td @@ -16,9 +16,15 @@ // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; +def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), + "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8; +def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), + "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; +def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), + "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8; +def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), + "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp index e385335..50bc14d 100644 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -372,15 +372,10 @@ ReSimplify: case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; - case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; - case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; - case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; - case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break; case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; - case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; - case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; + case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 @@ -468,6 +463,18 @@ ReSimplify: case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break; case X86::JG_4: OutMI.setOpcode(X86::JG_1); break; + // Atomic load and store require a separate pseudo-inst because Acquire + // implies mayStore and Release implies mayLoad; fix these to regular MOV + // instructions here + case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; + case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; + case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; + case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; + case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; + case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; + case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; + case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; + // We don't currently select the correct instruction form for instructions // which have a short %eax, etc. form. Handle this by custom lowering, for // now. @@ -585,6 +592,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, } void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { + OutStreamer.EmitCodeRegion(); + X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: @@ -601,7 +610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); return; - + case X86::EH_RETURN: case X86::EH_RETURN64: { diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 06043ec..b0bb313 100644 --- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -53,10 +53,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; - /// ReserveFP - whether the function should reserve the frame pointer - /// when allocating, even if there may not actually be a frame pointer used. - bool ReserveFP; - /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; /// RegSaveFrameIndex - X86-64 vararg func register save area. @@ -65,6 +61,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsGPOffset; /// VarArgsFPOffset - X86-64 vararg func fp reg offset. unsigned VarArgsFPOffset; + /// ArgumentStackSize - The number of bytes on stack consumed by the arguments + /// being passed on the stack. + unsigned ArgumentStackSize; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -77,7 +76,8 @@ public: VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -87,11 +87,11 @@ public: TailCallReturnAddrDelta(0), SRetReturnReg(0), GlobalBaseReg(0), - ReserveFP(false), VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -114,9 +114,6 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } - bool getReserveFP() const { return ReserveFP; } - void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; } - int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; } @@ -128,6 +125,9 @@ public: unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; } void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; } + + unsigned getArgumentStackSize() const { return ArgumentStackSize; } + void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index f2faf59..c1ac9f3 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -53,7 +52,13 @@ ForceStackAlign("force-align-stack", X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(), TM(tm), TII(tii) { + : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::RIP : X86::EIP, + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false), + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)), + TM(tm), TII(tii) { + X86_MC::InitLLVM2SEHRegisterMapping(this); + // Cache some information. const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); @@ -70,40 +75,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, } } -static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) { - if (!Subtarget->is64Bit()) { - if (Subtarget->isTargetDarwin()) { - if (isEH) - return DWARFFlavour::X86_32_DarwinEH; - else - return DWARFFlavour::X86_32_Generic; - } else if (Subtarget->isTargetCygMing()) { - // Unsupported by now, just quick fallback - return DWARFFlavour::X86_32_Generic; - } else { - return DWARFFlavour::X86_32_Generic; - } - } - return DWARFFlavour::X86_64; -} - -/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF -/// specific numbering, used in debug info and exception tables. -int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - unsigned Flavour = getFlavour(Subtarget, isEH); - - return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); -} - -/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register. -int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - unsigned Flavour = getFlavour(Subtarget, isEH); - - return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour); -} - /// getCompactUnwindRegNum - This function maps the register to the number for /// compact unwind encoding. Return -1 if the register isn't valid. int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { @@ -121,7 +92,7 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { int X86RegisterInfo::getSEHRegNum(unsigned i) const { - int reg = getX86RegNum(i); + int reg = X86_MC::getX86RegNum(i); switch (i) { case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: @@ -140,96 +111,16 @@ X86RegisterInfo::getSEHRegNum(unsigned i) const { return reg; } -/// getX86RegNum - This function maps LLVM register identifiers to their X86 -/// specific numbering, which is used in various places encoding instructions. -unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { - switch(RegNo) { - case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; - case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; - case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; - case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; - case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: - return N86::ESP; - case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: - return N86::EBP; - case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: - return N86::ESI; - case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: - return N86::EDI; - - case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: - return N86::EAX; - case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: - return N86::ECX; - case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: - return N86::EDX; - case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: - return N86::EBX; - case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: - return N86::ESP; - case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: - return N86::EBP; - case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: - return N86::ESI; - case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: - return N86::EDI; - - case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: - case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: - return RegNo-X86::ST0; - - case X86::XMM0: case X86::XMM8: - case X86::YMM0: case X86::YMM8: case X86::MM0: - return 0; - case X86::XMM1: case X86::XMM9: - case X86::YMM1: case X86::YMM9: case X86::MM1: - return 1; - case X86::XMM2: case X86::XMM10: - case X86::YMM2: case X86::YMM10: case X86::MM2: - return 2; - case X86::XMM3: case X86::XMM11: - case X86::YMM3: case X86::YMM11: case X86::MM3: - return 3; - case X86::XMM4: case X86::XMM12: - case X86::YMM4: case X86::YMM12: case X86::MM4: - return 4; - case X86::XMM5: case X86::XMM13: - case X86::YMM5: case X86::YMM13: case X86::MM5: - return 5; - case X86::XMM6: case X86::XMM14: - case X86::YMM6: case X86::YMM14: case X86::MM6: - return 6; - case X86::XMM7: case X86::XMM15: - case X86::YMM7: case X86::YMM15: case X86::MM7: - return 7; - - case X86::ES: return 0; - case X86::CS: return 1; - case X86::SS: return 2; - case X86::DS: return 3; - case X86::FS: return 4; - case X86::GS: return 5; - - case X86::CR0: case X86::CR8 : case X86::DR0: return 0; - case X86::CR1: case X86::CR9 : case X86::DR1: return 1; - case X86::CR2: case X86::CR10: case X86::DR2: return 2; - case X86::CR3: case X86::CR11: case X86::DR3: return 3; - case X86::CR4: case X86::CR12: case X86::DR4: return 4; - case X86::CR5: case X86::CR13: case X86::DR5: return 5; - case X86::CR6: case X86::CR14: case X86::DR6: return 6; - case X86::CR7: case X86::CR15: case X86::DR7: return 7; - - // Pseudo index registers are equivalent to a "none" - // scaled index (See Intel Manual 2A, table 2-3) - case X86::EIZ: - case X86::RIZ: - return 4; - - default: - assert(isVirtualRegister(RegNo) && "Unknown physical register!"); - llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); - return 0; - } +const TargetRegisterClass * +X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + // The sub_8bit sub-register index is more constrained in 32-bit mode. + // It behaves just like the sub_8bit_hi index. + if (!Is64Bit && Idx == X86::sub_8bit) + Idx = X86::sub_8bit_hi; + + // Forward to TableGen's default version. + return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); } const TargetRegisterClass * @@ -355,8 +246,19 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass* X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ + // Don't allow super-classes of GR8_NOREX. This class is only used after + // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied + // to the full GR8 register class in 64-bit mode, so we cannot allow the + // reigster class inflation. + // + // The GR8_NOREX class is always used in a way that won't be constrained to a + // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the + // full GR8 class. + if (RC == X86::GR8_NOREXRegisterClass) + return RC; + const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { switch (Super->getID()) { case X86::GR8RegClassID: @@ -741,11 +643,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned X86RegisterInfo::getRARegister() const { - return Is64Bit ? X86::RIP // Should have dwarf #16. - : X86::EIP; // Should have dwarf #8. -} - unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? FramePtr : StackPtr; @@ -948,7 +845,7 @@ namespace { for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) { - FuncInfo->setReserveFP(true); + FuncInfo->setForceFramePointer(true); return true; } } diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h index a12eb12..7d39c68 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h @@ -24,22 +24,6 @@ namespace llvm { class TargetInstrInfo; class X86TargetMachine; -/// N86 namespace - Native X86 register numbers -/// -namespace N86 { - enum { - EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 - }; -} - -/// DWARFFlavour - Flavour of dwarf regnumbers -/// -namespace DWARFFlavour { - enum { - X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 - }; -} - class X86RegisterInfo : public X86GenRegisterInfo { public: X86TargetMachine &TM; @@ -73,11 +57,6 @@ public: /// register identifier. static unsigned getX86RegNum(unsigned RegNo); - /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum - /// (created by TableGen) for target dependencies. - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; @@ -95,6 +74,9 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + virtual const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const; + const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; @@ -136,7 +118,6 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } // FIXME: Move to FrameInfok diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td index 203722a..9a7db36 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td @@ -390,6 +390,13 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, (GR32_NOREX sub_32bit)]; } +// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit +// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs +// to clear upper 32-bits of RAX so is not a NOP. +def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> { + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; +} + // GR32_NOSP - GR32 registers except ESP. def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -455,8 +462,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; } -def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, - (sequence "YMM%u", 0, 15)> { +def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + 256, (sequence "YMM%u", 0, 15)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 02754f9..6406bce 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -54,7 +54,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { EVT IntPtr = TLI.getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 5e6c659..7064dd0 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -16,9 +16,11 @@ #include "X86InstrInfo.h" #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -185,24 +187,53 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV); - if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); - if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); - if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); - if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); - if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3); - if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41); - if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42); + if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } + if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); } + if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); } + if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); } + if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); } + if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} + if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} + if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX); + //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL); - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3); - HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); - HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); + if (IsIntel && ((ECX >> 1) & 0x1)) { + HasCLMUL = true; + ToggleFeature(X86::FeatureCLMUL); + } + if (IsIntel && ((ECX >> 12) & 0x1)) { + HasFMA3 = true; + ToggleFeature(X86::FeatureFMA3); + } + if (IsIntel && ((ECX >> 22) & 0x1)) { + HasMOVBE = true; + ToggleFeature(X86::FeatureMOVBE); + } + if (IsIntel && ((ECX >> 23) & 0x1)) { + HasPOPCNT = true; + ToggleFeature(X86::FeaturePOPCNT); + } + if (IsIntel && ((ECX >> 25) & 0x1)) { + HasAES = true; + ToggleFeature(X86::FeatureAES); + } + if (IsIntel && ((ECX >> 29) & 0x1)) { + HasF16C = true; + ToggleFeature(X86::FeatureF16C); + } + if (IsIntel && ((ECX >> 30) & 0x1)) { + HasRDRAND = true; + ToggleFeature(X86::FeatureRDRAND); + } + + if ((ECX >> 13) & 0x1) { + HasCmpxchg16b = true; + ToggleFeature(X86::FeatureCMPXCHG16B); + } if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -224,6 +255,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); } + if ((ECX >> 5) & 0x1) { + HasLZCNT = true; + ToggleFeature(X86::FeatureLZCNT); + } if (IsAMD && ((ECX >> 6) & 0x1)) { HasSSE4A = true; ToggleFeature(X86::FeatureSSE4A); @@ -251,14 +286,21 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasLZCNT(false) + , HasBMI(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) + , HasCmpxchg16b(false) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) - , In64BitMode(is64Bit) { + , In64BitMode(is64Bit) + , InNaClMode(false) { // Determine default and user specified characteristics if (!FS.empty() || !CPU.empty()) { std::string CPUName = CPU; @@ -304,6 +346,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, if (In64BitMode) ToggleFeature(X86::Mode64Bit); + if (isTargetNaCl()) { + InNaClMode = true; + ToggleFeature(X86::ModeNaCl); + } + if (HasAVX) X86SSELevel = NoMMXSSE; @@ -313,6 +360,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); + if(EnableSegmentedStacks && !isTargetELF()) + report_fatal_error("Segmented stacks are only implemented on ELF."); + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 6d22027..3258d3d 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -90,6 +90,21 @@ protected: /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; + /// HasMOVBE - True if the processor has the MOVBE instruction. + bool HasMOVBE; + + /// HasRDRAND - True if the processor has the RDRAND instruction. + bool HasRDRAND; + + /// HasF16C - Processor has 16-bit floating point conversion instructions. + bool HasF16C; + + /// HasLZCNT - Processor has LZCNT instruction. + bool HasLZCNT; + + /// HasBMI - Processor has BMI1 instructions. + bool HasBMI; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -100,6 +115,10 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; + /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction; + /// this is true for most x86-64 chips, but not the first AMD chips. + bool HasCmpxchg16b; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -115,6 +134,9 @@ private: /// In64BitMode - True if compiling for 64-bit, false for 32-bit. bool In64BitMode; + /// InNaClMode - True if compiling for Native Client target. + bool InNaClMode; + public: /// This constructor initializes the data members to match that @@ -165,9 +187,15 @@ public: bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } + bool hasMOVBE() const { return HasMOVBE; } + bool hasRDRAND() const { return HasRDRAND; } + bool hasF16C() const { return HasF16C; } + bool hasLZCNT() const { return HasLZCNT; } + bool hasBMI() const { return HasBMI; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } + bool hasCmpxchg16b() const { return HasCmpxchg16b; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -185,6 +213,11 @@ public: return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + bool isTargetNaCl() const { + return TargetTriple.getOS() == Triple::NativeClient; + } + bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } + bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } @@ -199,7 +232,8 @@ public: } bool isTargetWin64() const { - return In64BitMode && (isTargetMingw() || isTargetWindows()); + // FIXME: x86_64-cygwin has not been released yet. + return In64BitMode && (isTargetCygMing() || isTargetWindows()); } bool isTargetEnvMacho() const { diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 9cab0e0..15c6c4e 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -16,65 +16,32 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - - if (TheTriple.isOSWindows()) - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); - - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); -} - extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - - // Register the code emitter. - TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createX86MCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createX86MCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(TheX86_32Target, - createX86_32AsmBackend); - TargetRegistry::RegisterAsmBackend(TheX86_64Target, - createX86_64AsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(TheX86_32Target, - createMCStreamer); - TargetRegistry::RegisterObjectStreamer(TheX86_64Target, - createMCStreamer); } -X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : X86TargetMachine(T, TT, CPU, FS, false), +X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? - "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" + "n8:16:32-S128" : (getSubtargetImpl()->isTargetCygMing() || getSubtargetImpl()->isTargetWindows()) ? - "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" : - "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"), + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-" + "n8:16:32-S32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" + "n8:16:32-S128"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -82,11 +49,12 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, } -X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : X86TargetMachine(T, TT, CPU, FS, true), - DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), +X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, true), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -95,52 +63,14 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, /// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), +X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { - DefRelocModel = getRelocationModel(); - - // If no relocation model was picked, default as appropriate for the target. - if (getRelocationModel() == Reloc::Default) { - // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. - // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we - // use static relocation model by default. - if (Subtarget.isTargetDarwin()) { - if (Subtarget.is64Bit()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::DynamicNoPIC); - } else if (Subtarget.isTargetWin64()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::Static); - } - - assert(getRelocationModel() != Reloc::Default && - "Relocation mode not picked"); - - // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC - // is defined as a model for code which may be used in static or dynamic - // executables but not necessarily a shared library. On X86-32 we just - // compile in -static mode, in x86-64 we use PIC. - if (getRelocationModel() == Reloc::DynamicNoPIC) { - if (is64Bit) - setRelocationModel(Reloc::PIC_); - else if (!Subtarget.isTargetDarwin()) - setRelocationModel(Reloc::Static); - } - - // If we are on Darwin, disallow static relocation model in X86-64 mode, since - // the Mach-O file format doesn't support it. - if (getRelocationModel() == Reloc::Static && - Subtarget.isTargetDarwin() && - is64Bit) - setRelocationModel(Reloc::PIC_); - // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. @@ -161,16 +91,20 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.setPICStyle(PICStyles::GOT); } - // Finally, if we have "none" as our PIC style, force to static mode. - if (Subtarget.getPICStyle() == PICStyles::None) - setRelocationModel(Reloc::Static); - // default to hard float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// +// Command line options for x86 +//===----------------------------------------------------------------------===// +static cl::opt<bool> +UseVZeroUpper("x86-use-vzeroupper", + cl::desc("Minimize AVX to SSE transition penalty"), + cl::init(false)); + +//===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// @@ -200,46 +134,25 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { - PM.add(createSSEDomainFixPass()); - return true; + bool ShouldPrint = false; + if (OptLevel != CodeGenOpt::None && + (Subtarget.hasSSE2() || Subtarget.hasAVX())) { + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + ShouldPrint = true; } - return false; + + if (Subtarget.hasAVX() && UseVZeroUpper) { + PM.add(createX86IssueVZeroUpperPass()); + ShouldPrint = true; + } + + return ShouldPrint; } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // FIXME: Move this to TargetJITInfo! - // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { - setRelocationModel(Reloc::Static); - Subtarget.setPICStyle(PICStyles::None); - } - - PM.add(createX86JITCodeEmitterPass(*this, JCE)); return false; } - -void X86TargetMachine::setCodeModelForStatic() { - - if (getCodeModel() != CodeModel::Default) return; - - // For static codegen, if we're not already set, use Small codegen. - setCodeModel(CodeModel::Small); -} - - -void X86TargetMachine::setCodeModelForJIT() { - - if (getCodeModel() != CodeModel::Default) return; - - // 64-bit JIT places everything in the same buffer except external functions. - if (Subtarget.is64Bit()) - setCodeModel(CodeModel::Large); - else - setCodeModel(CodeModel::Small); -} diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h index 885334a..d1569aa 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h @@ -29,21 +29,17 @@ namespace llvm { class formatted_raw_ostream; +class StringRef; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; X86FrameLowering FrameLowering; X86ELFWriterInfo ELFWriterInfo; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. -private: - // We have specific defaults for X86. - virtual void setCodeModelForJIT(); - virtual void setCodeModelForStatic(); - public: - X86TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + X86TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { @@ -87,8 +83,9 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; public: - X86_32TargetMachine(const Target &T, const std::string &M, - const std::string &CPU, const std::string &FS); + X86_32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -113,8 +110,9 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; public: - X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + X86_64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp index 1231798..991f322 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp @@ -43,79 +43,3 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } - -unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small || Model == CodeModel::Medium) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | (Model == CodeModel::Small ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const { - if (CFI) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - - return DW_EH_PE_udata4; -} - -unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h index e21b5bf..d7adf27 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h +++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h @@ -33,28 +33,6 @@ namespace llvm { MachineModuleInfo *MMI) const; }; - class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - const X86TargetMachine &TM; - public: - X8632_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { } - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding(bool CFI) const; - virtual unsigned getTTypeEncoding() const; - }; - - class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - const X86TargetMachine &TM; - public: - X8664_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { } - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding(bool CFI) const; - virtual unsigned getTTypeEncoding() const; - }; - } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp new file mode 100644 index 0000000..3958494 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -0,0 +1,105 @@ +//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which inserts x86 AVX vzeroupper instructions +// before calls to SSE encoded functions. This avoids transition latency +// penalty when tranfering control between AVX encoded instructions and old +// SSE encoding mode. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-codegen" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); + +namespace { + struct VZeroUpperInserter : public MachineFunctionPass { + static char ID; + VZeroUpperInserter() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + virtual const char *getPassName() const { return "X86 vzeroupper inserter";} + + private: + const TargetInstrInfo *TII; // Machine instruction info. + MachineBasicBlock *MBB; // Current basic block + }; + char VZeroUpperInserter::ID = 0; +} + +FunctionPass *llvm::createX86IssueVZeroUpperPass() { + return new VZeroUpperInserter(); +} + +/// runOnMachineFunction - Loop over all of the basic blocks, inserting +/// vzero upper instructions before function calls. +bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + bool Changed = false; + + // Process any unreachable blocks in arbitrary order now. + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + Changed |= processBasicBlock(MF, *BB); + + return Changed; +} + +static bool isCallToModuleFn(const MachineInstr *MI) { + assert(MI->getDesc().isCall() && "Isn't a call instruction"); + + for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (!MO.isGlobal()) + continue; + + const GlobalValue *GV = MO.getGlobal(); + GlobalValue::LinkageTypes LT = GV->getLinkage(); + if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) || + (GV->isExternalLinkage(LT) && !GV->isDeclaration())) + return true; + + return false; + } + return false; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// inserting vzero upper instructions before function calls. +bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &BB) { + bool Changed = false; + MBB = &BB; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineInstr *MI = I; + DebugLoc dl = I->getDebugLoc(); + + // Insert a vzeroupper instruction before each control transfer + // to functions outside this module + if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) { + BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + ++NumVZU; + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index c3b3dc9..0000000 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMXCoreDesc - XCoreMCTargetDesc.cpp - XCoreMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile deleted file mode 100644 index de61543..0000000 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMXCoreDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 939d97c..276e841 100644 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "XCoreMCTargetDesc.h" #include "XCoreMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "XCoreGenInstrInfo.inc" @@ -35,8 +36,10 @@ static MCInstrInfo *createXCoreMCInstrInfo() { return X; } -extern "C" void LLVMInitializeXCoreMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo); +static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitXCoreMCRegisterInfo(X, XCore::LR); + return X; } static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,11 +49,40 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeXCoreMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, - createXCoreMCSubtargetInfo); +static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT); + + // Initial state of the frame pointer is SP. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(XCore::SP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeXCoreMCAsmInfo() { - RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget); +// Force static initialization. +extern "C" void LLVMInitializeXCoreTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget, + createXCoreMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, + createXCoreMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp index 7aa8965..9a0971d 100644 --- a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp @@ -9,7 +9,7 @@ #include "XCore.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheXCoreTarget; diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 1a43714..8906b24 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -32,11 +33,11 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cctype> @@ -51,6 +52,7 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); public: explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){} @@ -79,6 +81,7 @@ namespace { void EmitFunctionEntryLabel(); void EmitInstruction(const MachineInstr *MI); void EmitFunctionBodyEnd(); + virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -88,7 +91,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { assert(((GV->hasExternalLinkage() || GV->hasWeakLinkage()) || GV->hasLinkOnceLinkage()) && "Unexpected linkage"); - if (const ArrayType *ATy = dyn_cast<ArrayType>( + if (ArrayType *ATy = dyn_cast<ArrayType>( cast<PointerType>(GV->getType())->getElementType())) { OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); // FIXME: MCStreamerize. @@ -261,16 +264,57 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } +void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps == 4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); +} + +MachineLocation XCoreAsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { + // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue. + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Unexpected MachineOperand types"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); - // Check for mov mnemonic - if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) - O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " - << getRegisterName(MI->getOperand(1).getReg()); - else - printInstruction(MI, O); + switch (MI->getOpcode()) { + case XCore::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + case XCore::ADD_2rus: + if (MI->getOperand(2).getImm() == 0) { + O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " + << getRegisterName(MI->getOperand(1).getReg()); + OutStreamer.EmitRawText(O.str()); + return; + } + break; + } + printInstruction(MI, O); OutStreamer.EmitRawText(O.str()); } diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 0578220..7f8b169 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -100,7 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool FP = hasFP(MF); - bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest); + bool Nested = MF.getFunction()-> + getAttributes().hasAttrSomewhere(Attribute::Nest); if (Nested) { loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); @@ -270,14 +271,6 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, } } -void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is SP. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(XCore::SP, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h index 7da19f0..c591e93 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h @@ -42,8 +42,6 @@ namespace llvm { bool hasFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index a8dd847..4dac1ce 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -169,9 +169,14 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), TLI.getPointerTy()); - return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, - MVT::Other, CPIdx, - CurDAG->getEntryNode()); + SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4); + cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1); + return node; } break; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 6d040e0..2afe0e3 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -81,6 +81,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // XCore does not have the NodeTypes below. setOperationAction(ISD::BR_CC, MVT::Other, Expand); @@ -147,7 +148,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; maxStoresPerMemmove = maxStoresPerMemmoveOptSize @@ -180,7 +182,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -252,8 +255,8 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); } -static inline bool isZeroLengthArray(const Type *Ty) { - const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty); +static inline bool isZeroLengthArray(Type *Ty) { + ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty); return AT && (AT->getNumElements() == 0); } @@ -275,7 +278,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const llvm_unreachable("Thread local object not a GlobalVariable?"); return SDValue(); } - const Type *Ty = cast<PointerType>(GV->getType())->getElementType(); + Type *Ty = cast<PointerType>(GV->getType())->getElementType(); if (!Ty->isSized() || isZeroLengthArray(Ty)) { #ifndef NDEBUG errs() << "Size of thread local object " << GVar->getName() @@ -465,7 +468,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -524,7 +527,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -789,7 +792,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, } SDValue XCoreTargetLowering:: -LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { +LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue XCoreTargetLowering:: +LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -841,9 +849,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } //===----------------------------------------------------------------------===// @@ -1148,10 +1154,10 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, int offset = 0; // Save remaining registers, storing higher register numbers at a higher // address - for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { + for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) { // Create a stack slot int FI = MFI->CreateFixedObject(4, offset, true); - if (i == FirstVAReg) { + if (i == (int)FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } offset -= StackSlotSize; @@ -1409,7 +1415,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // operands are constant canonicalize smallest to RHS. if ((N0C && !N1C) || (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue())) - return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3); + return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), + N1, N0, N2, N3); // lmul(x, 0, a, b) if (N1C && N1C->isNullValue()) { @@ -1548,7 +1555,7 @@ static inline bool isImmUs4(int64_t val) /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h index 9c803be..d6c5b32 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -101,7 +101,7 @@ namespace llvm { MachineBasicBlock *MBB) const; virtual bool isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const; + Type *Ty) const; private: const XCoreTargetMachine &TM; @@ -145,7 +145,8 @@ namespace llvm { SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair<unsigned, const TargetRegisterClass*> diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index f90481f..a0946a1 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -17,11 +17,10 @@ #include "llvm/MC/MCContext.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "XCoreGenInstrInfo.inc" @@ -387,6 +386,15 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0); } +MachineInstr* +XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h index 840b1e1..d354802 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -78,6 +78,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td index 55c7527..4d2e93b 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -572,7 +572,7 @@ def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr), [(store GRRegs:$val, ADDRdpii:$addr)]>; //let Uses = [CP] in .. -let mayLoad = 1, isReMaterializable = 1 in +let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in defm LDWCP : FRU6_LRU6_cp<"ldw">; let Uses = [SP] in { @@ -739,7 +739,7 @@ def LDAP_lu10_ba : _FLU10<(outs), let isCall=1, // All calls clobber the link register and the non-callee-saved registers: -Defs = [R0, R1, R2, R3, R11, LR] in { +Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { def BL_u10 : _FU10< (outs), (ins calltarget:$target, variable_ops), @@ -754,7 +754,7 @@ def BL_lu10 : _FLU10< } // Two operand short -// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg) +// TODO eet, eef, tsetmr def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -764,15 +764,25 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), [(set GRRegs:$dst, (ineg GRRegs:$b))]>; let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "sext $dst, $src2", - []>; + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, + immBitp:$src2))]>; + +def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, + GRRegs:$src2))]>; -let neverHasSideEffects = 1 in def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "zext $dst, $src2", - []>; + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, + immBitp:$src2))]>; + +def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, + GRRegs:$src2))]>; def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), "andnot $dst, $src2", @@ -819,7 +829,8 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), let Constraints = "$src = $dst" in def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), "outshr res[$r], $src", - [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, + GRRegs:$src))]>; def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", @@ -836,7 +847,8 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), let Constraints = "$src = $dst" in def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), "inshr $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, + GRRegs:$src))]>; def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", @@ -846,6 +858,14 @@ def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, immUs:$val)]>; +def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "testct $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>; + +def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "testwct $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>; + def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; @@ -871,7 +891,6 @@ def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>; // Two operand long -// TODO endin, peek, // getd, testlcl def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -917,6 +936,14 @@ def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), "setpsc res[$src1], $src2", [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; +def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "peek $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>; + +def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "endin $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; + // One operand short // TODO edu, eeu, waitet, waitef, tstart, clrtp // setdp, setcp, setev, kcall @@ -960,7 +987,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), let isCall=1, // All calls clobber the link register and the non-callee-saved registers: -Defs = [R0, R1, R2, R3, R11, LR] in { +Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), "bla $addr", [(XCoreBranchLink GRRegs:$addr)]>; @@ -974,10 +1001,15 @@ def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; -let Uses=[R11] in +let Uses=[R11] in { def SETV_1r : _F1R<(outs), (ins GRRegs:$r), - "setv res[$r], r11", - [(int_xcore_setv GRRegs:$r, R11)]>; + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def SETEV_1r : _F1R<(outs), (ins GRRegs:$r), + "setev res[$r], r11", + [(int_xcore_setev GRRegs:$r, R11)]>; +} def EEU_1r : _F1R<(outs), (ins GRRegs:$r), "eeu res[$r]", @@ -985,15 +1017,24 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r), // Zero operand short // TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, -// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, +// stet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; -let Defs = [R11] in +let Defs = [R11] in { def GETID_0R : _F0R<(outs), (ins), - "get r11, id", - [(set R11, (int_xcore_getid))]>; + "get r11, id", + [(set R11, (int_xcore_getid))]>; + +def GETED_0R : _F0R<(outs), (ins), + "get r11, ed", + [(set R11, (int_xcore_geted))]>; + +def GETET_0R : _F0R<(outs), (ins), + "get r11, et", + [(set R11, (int_xcore_getet))]>; +} def SSYNC_0r : _F0R<(outs), (ins), "ssync", diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp index 357a4a0..1b78b37 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -39,7 +38,7 @@ using namespace llvm; XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii) - : XCoreGenRegisterInfo(), TII(tii) { + : XCoreGenRegisterInfo(XCore::LR), TII(tii) { } // helper functions @@ -321,20 +320,8 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); } -int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} - unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? XCore::R10 : XCore::SP; } - -unsigned XCoreRegisterInfo::getRARegister() const { - return XCore::LR; -} diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h index 801d9eb..5c28f39 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h @@ -60,7 +60,6 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; //! Return the array of argument passing registers @@ -74,10 +73,6 @@ public: //! Return whether to emit frame moves static bool needsFrameMoves(const MachineFunction &MF); - - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp index ad069bf..b4e9927 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp @@ -13,7 +13,7 @@ #include "XCoreSubtarget.h" #include "XCore.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 342966a..fdc5d35 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -14,15 +14,15 @@ #include "XCore.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; /// XCoreTargetMachine ctor - Create an ILP32 architecture model /// -XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h index 6235ac3..83d09d6d 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -32,8 +32,9 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; public: - XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + XCoreTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index fa007cf..e160f63 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -155,12 +155,12 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); Argument *PtrArg = PointerArgs[i].first; - const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); + Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. if (isByVal) { - if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" @@ -190,7 +190,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { - if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { @@ -382,7 +382,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); @@ -410,7 +411,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); } else { // Other uses than load? @@ -492,7 +494,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -527,8 +529,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; @@ -576,9 +578,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), - SI->begin(), - SI->end())); + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); assert(Params.back()); } @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. @@ -662,13 +662,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, + Value *Idx = GetElementPtrInst::Create(*AI, Idxs, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? @@ -686,12 +686,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); - const Type *ElTy = V->getType(); + Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (ElTy->isStructTy() ? + Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); @@ -699,8 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), - V->getName()+".idx", Call); + V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -792,16 +791,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); - const StructType *STy = cast<StructType>(AgTy); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + GetElementPtrInst::Create(TheAlloca, Idxs, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index a21efce..c3ecb7a 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -23,7 +23,9 @@ #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -37,10 +39,18 @@ namespace { initializeConstantMergePass(*PassRegistry::getPassRegistry()); } - // run - For this pass, process all of the globals in the module, - // eliminating duplicate constants. - // + // For this pass, process all of the globals in the module, eliminating + // duplicate constants. bool runOnModule(Module &M); + + // Return true iff we can determine the alignment of this global variable. + bool hasKnownAlignment(GlobalVariable *GV) const; + + // Return the alignment of the global, including converting the default + // alignment to a concrete value. + unsigned getAlignment(GlobalVariable *GV) const; + + const TargetData *TD; }; } @@ -77,15 +87,28 @@ static bool IsBetterCannonical(const GlobalVariable &A, return A.hasUnnamedAddr(); } +bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { + return TD || GV->getAlignment() != 0; +} + +unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { + if (TD) + return TD->getPreferredAlignment(GV); + return GV->getAlignment(); +} + bool ConstantMerge::runOnModule(Module &M) { + TD = getAnalysisIfAvailable<TargetData>(); + // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); - // Map unique constant/section pairs to globals. We don't want to merge - // globals in different sections. - DenseMap<Constant*, GlobalVariable*> CMap; + // Map unique <constants, has-unknown-alignment> pairs to globals. We don't + // want to merge globals of unknown alignment with those of explicit + // alignment. If we have TargetData, we always know the alignment. + DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -120,7 +143,8 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - GlobalVariable *&Slot = CMap[Init]; + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *&Slot = CMap[Pair]; // If this is the first constant we find or if the old on is local, // replace with the current one. It the current is externally visible @@ -152,7 +176,8 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - GlobalVariable *Slot = CMap[Init]; + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *Slot = CMap[Pair]; if (!Slot || Slot == GV) continue; @@ -175,6 +200,14 @@ bool ConstantMerge::runOnModule(Module &M) { // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Bump the alignment if necessary. + if (Replacements[i].first->getAlignment() || + Replacements[i].second->getAlignment()) { + Replacements[i].second->setAlignment(std::max( + Replacements[i].first->getAlignment(), + Replacements[i].second->getAlignment())); + } + // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 1517765..4bb6f7a 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -206,7 +206,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. - const FunctionType *FTy = Fn.getFunctionType(); + FunctionType *FTy = Fn.getFunctionType(); std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), @@ -344,7 +344,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) static unsigned NumRetVals(const Function *F) { if (F->getReturnType()->isVoidTy()) return 0; - else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) + else if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) return STy->getNumElements(); else return 1; @@ -491,7 +491,7 @@ void DAE::SurveyFunction(const Function &F) { // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; - const Type *STy = dyn_cast<StructType>(F.getReturnType()); + Type *STy = dyn_cast<StructType>(F.getReturnType()); // Loop all uses of the function. for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { @@ -646,7 +646,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; // Set up to build a new list of parameter attributes. @@ -660,7 +660,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); - const Type *NRetTy = NULL; + Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index @@ -669,7 +669,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { - const StructType *STy = dyn_cast<StructType>(RetTy); + StructType *STy = dyn_cast<StructType>(RetTy); if (STy) // Look at each of the original return values individually. for (unsigned i = 0; i != RetCount; ++i) { diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 95decec..0edf342 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -163,14 +163,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { ReadsMemory = true; continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore non-volatile loads from local memory. + // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(LI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore non-volatile stores to local memory. + // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(SI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 4ac721d..3552d03 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -195,12 +195,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, } if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { GS.isLoaded = true; - if (LI->isVolatile()) return true; // Don't hack on volatile loads. + // Don't hack on volatile/atomic loads. + if (!LI->isSimple()) return true; } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; - if (SI->isVolatile()) return true; // Don't hack on volatile stores. + // Don't hack on volatile/atomic stores. + if (!SI->isSimple()) return true; // If this is a direct store to the global (i.e., the global is a scalar // value, not an aggregate), keep more specific information about @@ -281,18 +283,18 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); } else if (isa<ConstantAggregateZero>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return Constant::getNullValue(STy->getElementType()); } } else if (isa<UndefValue>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return UndefValue::get(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return UndefValue::get(STy->getElementType()); } @@ -430,7 +432,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { ++GEPI; // Skip over the pointer index. // If this is a use of an array allocation, do a bit more checking for sanity. - if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { + if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); @@ -451,9 +453,9 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { GEPI != E; ++GEPI) { uint64_t NumElements; - if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) + if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { assert((*GEPI)->isStructTy() && @@ -498,7 +500,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); - const Type *Ty = Init->getType(); + Type *Ty = Init->getType(); std::vector<GlobalVariable*> NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); @@ -508,7 +510,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -531,9 +533,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } - } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { unsigned NumElements = 0; - if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) NumElements = ATy->getNumElements(); else NumElements = cast<VectorType>(STy)->getNumElements(); @@ -596,15 +598,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), - &Idxs[0], Idxs.size()); + NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); } else { GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); SmallVector<Value*, 8> Idxs; Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, GEPI->getName()+"."+Twine(Val),GEPI); } } @@ -753,8 +754,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -846,12 +846,12 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - const Type *GlobalType; + Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; else @@ -1192,7 +1192,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); PHINode *NewPN = @@ -1245,8 +1245,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - Value *NGEPI = GetElementPtrInst::Create(NewPtr, - GEPIdx.begin(), GEPIdx.end(), + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1260,11 +1259,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // already been seen first by another load, so its uses have already been // processed. PHINode *PN = cast<PHINode>(LoadUser); - bool Inserted; - DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; - tie(InsertPos, Inserted) = - InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); - if (!Inserted) return; + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector<Value*>())).second) + return; // If this is the first time we've seen this PHI, recursively process all // users. @@ -1298,8 +1295,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - const Type* MAT = getMallocAllocatedType(CI); - const StructType *STy = cast<StructType>(MAT); + Type* MAT = getMallocAllocatedType(CI); + StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1313,8 +1310,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector<Value*> FieldMallocs; for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::getUnqual(FieldTy); GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), @@ -1325,9 +1322,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldGlobals.push_back(NGV); unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType *ST = dyn_cast<StructType>(FieldTy)) + if (StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, 0, @@ -1379,8 +1376,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); + Constant::getNullValue(GVVal->getType())); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", @@ -1428,7 +1424,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); Constant *Null = Constant::getNullValue(PT->getElementType()); new StoreInst(Null, FieldGlobals[i], SI); } @@ -1485,7 +1481,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { if (!TD) @@ -1538,10 +1534,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) AllocTy = AT->getElementType(); - const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + StructType *AllocSTy = dyn_cast<StructType>(AllocTy); if (!AllocSTy) return false; @@ -1552,8 +1548,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); @@ -1596,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - const Type* MallocType = getMallocAllocatedType(CI); + Type* MallocType = getMallocAllocatedType(CI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; @@ -1611,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - const Type *GVElType = GV->getType()->getElementType(); + Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select @@ -1761,7 +1757,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); + Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); if (!isa<UndefValue>(GV->getInitializer())) @@ -2003,7 +1999,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); CSVals[1] = 0; - const StructType *StructTy = + StructType *StructTy = cast <StructType>( cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); @@ -2013,9 +2009,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), false); - const PointerType *PFTy = PointerType::getUnqual(FTy); + PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 0x7fffffff); @@ -2196,7 +2192,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } std::vector<Constant*> Elts; - if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { @@ -2224,10 +2220,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast<SequentialType>(Init->getType()); + SequentialType *InitTy = cast<SequentialType>(Init->getType()); uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) NumElts = ATy->getNumElements(); else NumElts = cast<VectorType>(InitTy)->getNumElements(); @@ -2338,7 +2334,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Constant *InstResult = 0; if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (SI->isVolatile()) return false; // no volatile accesses. + if (!SI->isSimple()) return false; // no volatile/atomic accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. @@ -2358,7 +2354,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // stored value. Ptr = CE->getOperand(0); - const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); + Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try @@ -2367,14 +2363,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // If NewTy is a struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. - if (const StructType *STy = dyn_cast<StructType>(NewTy)) { + if (StructType *STy = dyn_cast<StructType>(NewTy)) { NewTy = STy->getTypeAtIndex(0U); - const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; - Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2); + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); // If we can't improve the situation by introspecting NewTy, // we have to give up. @@ -2411,17 +2407,17 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = cast<GEPOperator>(GEP)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : - ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = + ConstantExpr::getGetElementPtr(P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (LI->isVolatile()) return false; // no volatile accesses. + if (!LI->isSimple()) return false; // no volatile/atomic accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), MutatedMemory); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. - const Type *Ty = AI->getType()->getElementType(); + Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), @@ -2465,8 +2461,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals.data(), - Formals.size())) { + if (Constant *C = ConstantFoldCall(Callee, Formals)) { InstResult = C; } else { return false; @@ -2512,7 +2507,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { - // invoke, unwind, unreachable. + // invoke, unwind, resume, unreachable. return false; // Cannot handle this terminator. } @@ -2711,7 +2706,7 @@ static Function *FindCXAAtExit(Module &M) { if (!Fn) return 0; - const FunctionType *FTy = Fn->getFunctionType(); + FunctionType *FTy = Fn->getFunctionType(); // Checking that the function has the right return type, the right number of // parameters and that they all have pointer types should be enough. diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index 25c0134..d757e1f 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -167,7 +167,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Check to see if this function returns a constant. SmallVector<Value *,4> RetVals; - const StructType *STy = dyn_cast<StructType>(F.getReturnType()); + StructType *STy = dyn_cast<StructType>(F.getReturnType()); if (STy) for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) RetVals.push_back(UndefValue::get(STy->getElementType(i))); diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 31ce95f..6233922 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" @@ -35,7 +36,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerSetJmpPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); @@ -70,6 +70,10 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionInliningPass()); } +void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(llvm::createAlwaysInlinerPass()); +} + void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGlobalDCEPass()); } @@ -82,10 +86,6 @@ void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIPConstantPropagationPass()); } -void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLowerSetJmpPass()); -} - void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } @@ -98,11 +98,6 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { unwrap(PM)->add(createInternalizePass(AllButMain != 0)); } - -void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { - // FIXME: Remove in LLVM 3.0. -} - void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); } diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp index ce795b7..c0426da 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -32,10 +33,10 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - // Use extremely low threshold. + // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -52,8 +53,8 @@ namespace { void growCachedCostInfo(Function* Caller, Function* Callee) { CA.growCachedCostInfo(Caller, Callee); } - virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, &NeverInline); + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); void releaseMemory() { @@ -71,11 +72,13 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline", Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } -// doInitialization - Initializes the vector of functions that have not +// doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline)) diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp index 0c5b3be..84dd4fd 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -30,7 +31,7 @@ namespace { class SimpleInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: SimpleInliner() : Inliner(ID) { @@ -68,16 +69,17 @@ INITIALIZE_PASS_END(SimpleInliner, "inline", Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } -Pass *llvm::createFunctionInliningPass(int Threshold) { +Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } // doInitialization - Initializes the vector of functions that have been // annotated with the noinline attribute. bool SimpleInliner::doInitialization(CallGraph &CG) { - + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) @@ -85,34 +87,34 @@ bool SimpleInliner::doInitialization(CallGraph &CG) { // Get llvm.noinline GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); - + if (GV == 0) return false; // Don't crash on invalid code if (!GV->hasDefinitiveInitializer()) return false; - + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - + if (InitList == 0) return false; // Iterate over each element and add to the NeverInline set for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - + // Get Source const Constant *Elt = InitList->getOperand(i); - + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) - if (CE->getOpcode() == Instruction::BitCast) + if (CE->getOpcode() == Instruction::BitCast) Elt = CE->getOperand(0); - + // Insert into set of functions to never inline if (const Function *F = dyn_cast<Function>(Elt)) NeverInline.insert(F); } - + return false; } diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 57f3e77..f00935b 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -62,7 +62,7 @@ void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { } -typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > InlinedArrayAllocasTy; /// InlineCallIfPossible - If it is possible to inline the specified call site, @@ -139,7 +139,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). - const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 848944d..4f96afe4 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/FunctionUtils.h" #include "llvm/ADT/Statistic.h" #include <fstream> @@ -53,12 +54,12 @@ namespace { char LoopExtractor::ID = 0; INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false) + "Extract loops into new functions", false, false) INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_END(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false) + "Extract loops into new functions", false, false) namespace { /// SingleLoopExtractor - For bugpoint. @@ -100,9 +101,9 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { L->getHeader()->getParent()->getEntryBlock().getTerminator(); if (!isa<BranchInst>(EntryTI) || !cast<BranchInst>(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != L->getHeader()) + EntryTI->getSuccessor(0) != L->getHeader()) { ShouldExtractLoop = true; - else { + } else { // Check to see if any exits from the loop are more than just return // blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -113,6 +114,21 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { break; } } + + if (ShouldExtractLoop) { + // We must omit landing pads. Landing pads must accompany the invoke + // instruction. But this would result in a loop in the extracted + // function. An infinite cycle occurs when it tries to extract that loop as + // well. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i]->isLandingPad()) { + ShouldExtractLoop = false; + break; + } + } + if (ShouldExtractLoop) { if (NumLoops == 0) return Changed; --NumLoops; @@ -149,6 +165,7 @@ namespace { /// BlocksToNotExtract list. class BlockExtractorPass : public ModulePass { void LoadFile(const char *Filename); + void SplitLandingPadPreds(Function *F); std::vector<BasicBlock*> BlocksToNotExtract; std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; @@ -171,8 +188,7 @@ INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. // -ModulePass *llvm::createBlockExtractorPass() -{ +ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorPass(); } @@ -194,6 +210,37 @@ void BlockExtractorPass::LoadFile(const char *Filename) { } } +/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke +/// instruction. The critical edge breaker will refuse to break critical edges +/// to a landing pad. So do them here. After this method runs, all landing pads +/// should have only one predecessor. +void BlockExtractorPass::SplitLandingPadPreds(Function *F) { + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + InvokeInst *II = dyn_cast<InvokeInst>(I); + if (!II) continue; + BasicBlock *Parent = II->getParent(); + BasicBlock *LPad = II->getUnwindDest(); + + // Look through the landing pad's predecessors. If one of them ends in an + // 'invoke', then we want to split the landing pad. + bool Split = false; + for (pred_iterator + PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (BB->isLandingPad() && BB != Parent && + isa<InvokeInst>(Parent->getTerminator())) { + Split = true; + break; + } + } + + if (!Split) continue; + + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + } +} + bool BlockExtractorPass::runOnModule(Module &M) { std::set<BasicBlock*> TranslatedBlocksToNotExtract; for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { @@ -236,13 +283,21 @@ bool BlockExtractorPass::runOnModule(Module &M) { // Now that we know which blocks to not extract, figure out which ones we WANT // to extract. std::vector<BasicBlock*> BlocksToExtract; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + SplitLandingPadPreds(&*F); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (!TranslatedBlocksToNotExtract.count(BB)) BlocksToExtract.push_back(BB); + } - for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) - ExtractBasicBlock(BlocksToExtract[i]); + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { + SmallVector<BasicBlock*, 2> BlocksToExtractVec; + BlocksToExtractVec.push_back(BlocksToExtract[i]); + if (const InvokeInst *II = + dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator())) + BlocksToExtractVec.push_back(II->getUnwindDest()); + ExtractBasicBlock(BlocksToExtractVec); + } return !BlocksToExtract.empty(); } diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp deleted file mode 100644 index 659476b..0000000 --- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp +++ /dev/null @@ -1,547 +0,0 @@ -//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the lowering of setjmp and longjmp to use the -// LLVM invoke and unwind instructions as necessary. -// -// Lowering of longjmp is fairly trivial. We replace the call with a -// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()". -// This unwinds the stack for us calling all of the destructors for -// objects allocated on the stack. -// -// At a setjmp call, the basic block is split and the setjmp removed. -// The calls in a function that have a setjmp are converted to invoke -// where the except part checks to see if it's a longjmp exception and, -// if so, if it's handled in the function. If it is, then it gets the -// value returned by the longjmp and goes to where the basic block was -// split. Invoke instructions are handled in a similar fashion with the -// original except block being executed if it isn't a longjmp except -// that is handled by that function. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// FIXME: This pass doesn't deal with PHI statements just yet. That is, -// we expect this to occur before SSAification is done. This would seem -// to make sense, but in general, it might be a good idea to make this -// pass invokable via the "opt" command at will. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lowersetjmp" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include <map> -using namespace llvm; - -STATISTIC(LongJmpsTransformed, "Number of longjmps transformed"); -STATISTIC(SetJmpsTransformed , "Number of setjmps transformed"); -STATISTIC(CallsTransformed , "Number of calls invokified"); -STATISTIC(InvokesTransformed , "Number of invokes modified"); - -namespace { - //===--------------------------------------------------------------------===// - // LowerSetJmp pass implementation. - class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> { - // LLVM library functions... - Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap - Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap - Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map - Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp - Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception - Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception - Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value - - typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair; - - // Keep track of those basic blocks reachable via a depth-first search of - // the CFG from a setjmp call. We only need to transform those "call" and - // "invoke" instructions that are reachable from the setjmp call site. - std::set<BasicBlock*> DFSBlocks; - - // The setjmp map is going to hold information about which setjmps - // were called (each setjmp gets its own number) and with which - // buffer it was called. - std::map<Function*, AllocaInst*> SJMap; - - // The rethrow basic block map holds the basic block to branch to if - // the exception isn't handled in the current function and needs to - // be rethrown. - std::map<const Function*, BasicBlock*> RethrowBBMap; - - // The preliminary basic block map holds a basic block that grabs the - // exception and determines if it's handled by the current function. - std::map<const Function*, BasicBlock*> PrelimBBMap; - - // The switch/value map holds a switch inst/call inst pair. The - // switch inst controls which handler (if any) gets called and the - // value is the value returned to that handler by the call to - // __llvm_sjljeh_get_longjmp_value. - std::map<const Function*, SwitchValuePair> SwitchValMap; - - // A map of which setjmps we've seen so far in a function. - std::map<const Function*, unsigned> SetJmpIDMap; - - AllocaInst* GetSetJmpMap(Function* Func); - BasicBlock* GetRethrowBB(Function* Func); - SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow); - - void TransformLongJmpCall(CallInst* Inst); - void TransformSetJmpCall(CallInst* Inst); - - bool IsTransformableFunction(StringRef Name); - public: - static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(ID) { - initializeLowerSetJmpPass(*PassRegistry::getPassRegistry()); - } - - void visitCallInst(CallInst& CI); - void visitInvokeInst(InvokeInst& II); - void visitReturnInst(ReturnInst& RI); - void visitUnwindInst(UnwindInst& UI); - - bool runOnModule(Module& M); - bool doInitialization(Module& M); - }; -} // end anonymous namespace - -char LowerSetJmp::ID = 0; -INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false) - -// run - Run the transformation on the program. We grab the function -// prototypes for longjmp and setjmp. If they are used in the program, -// then we can go directly to the places they're at and transform them. -bool LowerSetJmp::runOnModule(Module& M) { - bool Changed = false; - - // These are what the functions are called. - Function* SetJmp = M.getFunction("llvm.setjmp"); - Function* LongJmp = M.getFunction("llvm.longjmp"); - - // This program doesn't have longjmp and setjmp calls. - if ((!LongJmp || LongJmp->use_empty()) && - (!SetJmp || SetJmp->use_empty())) return false; - - // Initialize some values and functions we'll need to transform the - // setjmp/longjmp functions. - doInitialization(M); - - if (SetJmp) { - for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); - B != E; ++B) { - BasicBlock* BB = cast<Instruction>(*B)->getParent(); - for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), - E = df_ext_end(BB, DFSBlocks); I != E; ++I) - /* empty */; - } - - while (!SetJmp->use_empty()) { - assert(isa<CallInst>(SetJmp->use_back()) && - "User of setjmp intrinsic not a call?"); - TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); - Changed = true; - } - } - - if (LongJmp) - while (!LongJmp->use_empty()) { - assert(isa<CallInst>(LongJmp->use_back()) && - "User of longjmp intrinsic not a call?"); - TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); - Changed = true; - } - - // Now go through the affected functions and convert calls and invokes - // to new invokes... - for (std::map<Function*, AllocaInst*>::iterator - B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { - Function* F = B->first; - for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) - for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { - visit(*IB++); - if (IB != BB->end() && IB->getParent() != BB) - break; // The next instruction got moved to a different block! - } - } - - DFSBlocks.clear(); - SJMap.clear(); - RethrowBBMap.clear(); - PrelimBBMap.clear(); - SwitchValMap.clear(); - SetJmpIDMap.clear(); - - return Changed; -} - -// doInitialization - For the lower long/setjmp pass, this ensures that a -// module contains a declaration for the intrisic functions we are going -// to call to convert longjmp and setjmp calls. -// -// This function is always successful, unless it isn't. -bool LowerSetJmp::doInitialization(Module& M) -{ - const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); - const Type *SBPPTy = PointerType::getUnqual(SBPTy); - - // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for - // a description of the following library functions. - - // void __llvm_sjljeh_init_setjmpmap(void**) - InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", - Type::getVoidTy(M.getContext()), - SBPPTy, (Type *)0); - // void __llvm_sjljeh_destroy_setjmpmap(void**) - DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", - Type::getVoidTy(M.getContext()), - SBPPTy, (Type *)0); - - // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) - AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", - Type::getVoidTy(M.getContext()), - SBPPTy, SBPTy, - Type::getInt32Ty(M.getContext()), - (Type *)0); - - // void __llvm_sjljeh_throw_longjmp(int*, int) - ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", - Type::getVoidTy(M.getContext()), SBPTy, - Type::getInt32Ty(M.getContext()), - (Type *)0); - - // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) - TryCatchLJ = - M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", - Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0); - - // bool __llvm_sjljeh_is_longjmp_exception() - IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", - Type::getInt1Ty(M.getContext()), - (Type *)0); - - // int __llvm_sjljeh_get_longjmp_value() - GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", - Type::getInt32Ty(M.getContext()), - (Type *)0); - return true; -} - -// IsTransformableFunction - Return true if the function name isn't one -// of the ones we don't want transformed. Currently, don't transform any -// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error -// handling functions (beginning with __llvm_sjljeh_...they don't throw -// exceptions). -bool LowerSetJmp::IsTransformableFunction(StringRef Name) { - return !Name.startswith("__llvm_sjljeh_"); -} - -// TransformLongJmpCall - Transform a longjmp call into a call to the -// internal __llvm_sjljeh_throw_longjmp function. It then takes care of -// throwing the exception for us. -void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) -{ - const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); - - // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the - // same parameters as "longjmp", except that the buffer is cast to a - // char*. It returns "void", so it doesn't need to replace any of - // Inst's uses and doesn't get a name. - CastInst* CI = - new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); - Value *Args[] = { CI, Inst->getArgOperand(1) }; - CallInst::Create(ThrowLongJmp, Args, "", Inst); - - SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; - - // If the function has a setjmp call in it (they are transformed first) - // we should branch to the basic block that determines if this longjmp - // is applicable here. Otherwise, issue an unwind. - if (SVP.first) - BranchInst::Create(SVP.first->getParent(), Inst); - else - new UnwindInst(Inst->getContext(), Inst); - - // Remove all insts after the branch/unwind inst. Go from back to front to - // avoid replaceAllUsesWith if possible. - BasicBlock *BB = Inst->getParent(); - Instruction *Removed; - do { - Removed = &BB->back(); - // If the removed instructions have any users, replace them now. - if (!Removed->use_empty()) - Removed->replaceAllUsesWith(UndefValue::get(Removed->getType())); - Removed->eraseFromParent(); - } while (Removed != Inst); - - ++LongJmpsTransformed; -} - -// GetSetJmpMap - Retrieve (create and initialize, if necessary) the -// setjmp map. This map is going to hold information about which setjmps -// were called (each setjmp gets its own number) and with which buffer it -// was called. There can be only one! -AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) -{ - if (SJMap[Func]) return SJMap[Func]; - - // Insert the setjmp map initialization before the first instruction in - // the function. - Instruction* Inst = Func->getEntryBlock().begin(); - assert(Inst && "Couldn't find even ONE instruction in entry block!"); - - // Fill in the alloca and call to initialize the SJ map. - const Type *SBPTy = - Type::getInt8PtrTy(Func->getContext()); - AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); - CallInst::Create(InitSJMap, Map, "", Inst); - return SJMap[Func] = Map; -} - -// GetRethrowBB - Only one rethrow basic block is needed per function. -// If this is a longjmp exception but not handled in this block, this BB -// performs the rethrow. -BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) -{ - if (RethrowBBMap[Func]) return RethrowBBMap[Func]; - - // The basic block we're going to jump to if we need to rethrow the - // exception. - BasicBlock* Rethrow = - BasicBlock::Create(Func->getContext(), "RethrowExcept", Func); - - // Fill in the "Rethrow" BB with a call to rethrow the exception. This - // is the last instruction in the BB since at this point the runtime - // should exit this function and go to the next function. - new UnwindInst(Func->getContext(), Rethrow); - return RethrowBBMap[Func] = Rethrow; -} - -// GetSJSwitch - Return the switch statement that controls which handler -// (if any) gets called and the value returned to that handler. -LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, - BasicBlock* Rethrow) -{ - if (SwitchValMap[Func].first) return SwitchValMap[Func]; - - BasicBlock* LongJmpPre = - BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func); - - // Keep track of the preliminary basic block for some of the other - // transformations. - PrelimBBMap[Func] = LongJmpPre; - - // Grab the exception. - CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre); - - // The "decision basic block" gets the number associated with the - // setjmp call returning to switch on and the value returned by - // longjmp. - BasicBlock* DecisionBB = - BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func); - - BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); - - // Fill in the "decision" basic block. - CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB); - CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum", - DecisionBB); - - SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB); - return SwitchValMap[Func] = SwitchValuePair(SI, LJVal); -} - -// TransformSetJmpCall - The setjmp call is a bit trickier to transform. -// We're going to convert all setjmp calls to nops. Then all "call" and -// "invoke" instructions in the function are converted to "invoke" where -// the "except" branch is used when returning from a longjmp call. -void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) -{ - BasicBlock* ABlock = Inst->getParent(); - Function* Func = ABlock->getParent(); - - // Add this setjmp to the setjmp map. - const Type* SBPTy = - Type::getInt8PtrTy(Inst->getContext()); - CastInst* BufPtr = - new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst); - Value *Args[] = { - GetSetJmpMap(Func), BufPtr, - ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) - }; - CallInst::Create(AddSJToMap, Args, "", Inst); - - // We are guaranteed that there are no values live across basic blocks - // (because we are "not in SSA form" yet), but there can still be values live - // in basic blocks. Because of this, splitting the setjmp block can cause - // values above the setjmp to not dominate uses which are after the setjmp - // call. For all of these occasions, we must spill the value to the stack. - // - std::set<Instruction*> InstrsAfterCall; - - // The call is probably very close to the end of the basic block, for the - // common usage pattern of: 'if (setjmp(...))', so keep track of the - // instructions after the call. - for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end(); - I != E; ++I) - InstrsAfterCall.insert(I); - - for (BasicBlock::iterator II = ABlock->begin(); - II != BasicBlock::iterator(Inst); ++II) - // Loop over all of the uses of instruction. If any of them are after the - // call, "spill" the value to the stack. - for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (cast<Instruction>(U)->getParent() != ABlock || - InstrsAfterCall.count(cast<Instruction>(U))) { - DemoteRegToStack(*II); - break; - } - } - InstrsAfterCall.clear(); - - // Change the setjmp call into a branch statement. We'll remove the - // setjmp call in a little bit. No worries. - BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst); - assert(SetJmpContBlock && "Couldn't split setjmp BB!!"); - - SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont"); - - // Add the SetJmpContBlock to the set of blocks reachable from a setjmp. - DFSBlocks.insert(SetJmpContBlock); - - // This PHI node will be in the new block created from the - // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2, - "SetJmpReturn", Inst); - - // Coming from a call to setjmp, the return is 0. - PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), - ABlock); - - // Add the case for this setjmp's number... - SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); - SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), - SetJmpIDMap[Func] - 1), - SetJmpContBlock); - - // Value coming from the handling of the exception. - PHI->addIncoming(SVP.second, SVP.second->getParent()); - - // Replace all uses of this instruction with the PHI node created by - // the eradication of setjmp. - Inst->replaceAllUsesWith(PHI); - Inst->eraseFromParent(); - - ++SetJmpsTransformed; -} - -// visitCallInst - This converts all LLVM call instructions into invoke -// instructions. The except part of the invoke goes to the "LongJmpBlkPre" -// that grabs the exception and proceeds to determine if it's a longjmp -// exception or not. -void LowerSetJmp::visitCallInst(CallInst& CI) -{ - if (CI.getCalledFunction()) - if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || - CI.getCalledFunction()->isIntrinsic()) return; - - BasicBlock* OldBB = CI.getParent(); - - // If not reachable from a setjmp call, don't transform. - if (!DFSBlocks.count(OldBB)) return; - - BasicBlock* NewBB = OldBB->splitBasicBlock(CI); - assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); - DFSBlocks.insert(NewBB); - NewBB->setName("Call2Invoke"); - - Function* Func = OldBB->getParent(); - - // Construct the new "invoke" instruction. - TerminatorInst* Term = OldBB->getTerminator(); - CallSite CS(&CI); - std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); - InvokeInst* II = - InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], - Params, CI.getName(), Term); - II->setCallingConv(CI.getCallingConv()); - II->setAttributes(CI.getAttributes()); - - // Replace the old call inst with the invoke inst and remove the call. - CI.replaceAllUsesWith(II); - CI.eraseFromParent(); - - // The old terminator is useless now that we have the invoke inst. - Term->eraseFromParent(); - ++CallsTransformed; -} - -// visitInvokeInst - Converting the "invoke" instruction is fairly -// straight-forward. The old exception part is replaced by a query asking -// if this is a longjmp exception. If it is, then it goes to the longjmp -// exception blocks. Otherwise, control is passed the old exception. -void LowerSetJmp::visitInvokeInst(InvokeInst& II) -{ - if (II.getCalledFunction()) - if (!IsTransformableFunction(II.getCalledFunction()->getName()) || - II.getCalledFunction()->isIntrinsic()) return; - - BasicBlock* BB = II.getParent(); - - // If not reachable from a setjmp call, don't transform. - if (!DFSBlocks.count(BB)) return; - - BasicBlock* ExceptBB = II.getUnwindDest(); - - Function* Func = BB->getParent(); - BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), - "InvokeExcept", Func); - - // If this is a longjmp exception, then branch to the preliminary BB of - // the longjmp exception handling. Otherwise, go to the old exception. - CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept", - NewExceptBB); - - BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB); - - II.setUnwindDest(NewExceptBB); - ++InvokesTransformed; -} - -// visitReturnInst - We want to destroy the setjmp map upon exit from the -// function. -void LowerSetJmp::visitReturnInst(ReturnInst &RI) { - Function* Func = RI.getParent()->getParent(); - CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI); -} - -// visitUnwindInst - We want to destroy the setjmp map upon exit from the -// function. -void LowerSetJmp::visitUnwindInst(UnwindInst &UI) { - Function* Func = UI.getParent()->getParent(); - CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI); -} - -ModulePass *llvm::createLowerSetJmpPass() { - return new LowerSetJmp(); -} - diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 7796d05..0b01c38 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -76,7 +76,7 @@ STATISTIC(NumDoubleWeak, "Number of new functions created"); /// functions that will compare equal, without looking at the instructions /// inside the function. static unsigned profileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); FoldingSetNodeID ID; ID.AddInteger(F->size()); @@ -185,7 +185,7 @@ private: } /// Compare two Types, treating all pointer types as equal. - bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; + bool isEquivalentType(Type *Ty1, Type *Ty2) const; // The two functions undergoing comparison. const Function *F1, *F2; @@ -200,8 +200,8 @@ private: // Any two pointers in the same address space are equivalent, intptr_t and // pointers are equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(const Type *Ty1, - const Type *Ty2) const { +bool FunctionComparator::isEquivalentType(Type *Ty1, + Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { @@ -233,14 +233,14 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return true; case Type::PointerTyID: { - const PointerType *PTy1 = cast<PointerType>(Ty1); - const PointerType *PTy2 = cast<PointerType>(Ty2); + PointerType *PTy1 = cast<PointerType>(Ty1); + PointerType *PTy2 = cast<PointerType>(Ty2); return PTy1->getAddressSpace() == PTy2->getAddressSpace(); } case Type::StructTyID: { - const StructType *STy1 = cast<StructType>(Ty1); - const StructType *STy2 = cast<StructType>(Ty2); + StructType *STy1 = cast<StructType>(Ty1); + StructType *STy2 = cast<StructType>(Ty2); if (STy1->getNumElements() != STy2->getNumElements()) return false; @@ -255,8 +255,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, } case Type::FunctionTyID: { - const FunctionType *FTy1 = cast<FunctionType>(Ty1); - const FunctionType *FTy2 = cast<FunctionType>(Ty2); + FunctionType *FTy1 = cast<FunctionType>(Ty1); + FunctionType *FTy2 = cast<FunctionType>(Ty2); if (FTy1->getNumParams() != FTy2->getNumParams() || FTy1->isVarArg() != FTy2->isVarArg()) return false; @@ -272,8 +272,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, } case Type::ArrayTyID: { - const ArrayType *ATy1 = cast<ArrayType>(Ty1); - const ArrayType *ATy2 = cast<ArrayType>(Ty2); + ArrayType *ATy1 = cast<ArrayType>(Ty1); + ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } @@ -305,10 +305,14 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() && + LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && + LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() && + SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && + SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) @@ -317,22 +321,22 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { - if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) - if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { - if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) - if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) + return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) + return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) + return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && + FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) + return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && + CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() && + CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) + return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && + RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && + RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && + RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); return true; } @@ -346,9 +350,9 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end()); SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end()); uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), - Indices1.data(), Indices1.size()); + Indices1); uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), - Indices2.data(), Indices2.size()); + Indices2); return Offset1 == Offset2; } @@ -725,7 +729,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { SmallVector<Value *, 16> Args; unsigned i = 0; - const FunctionType *FFTy = F->getFunctionType(); + FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i))); diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 0000000..8fdfd72 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -0,0 +1,343 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#include "llvm-c/Transforms/PassManagerBuilder.h" + +#include "llvm/PassManager.h" +#include "llvm/DefaultPasses.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = 0; + Inliner = 0; + DisableSimplifyLibCalls = false; + DisableUnitAtATime = false; + DisableUnrollLoops = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; + +void PassManagerBuilder::addGlobalExtension( + PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + GlobalExtensions->push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + PassManagerBase &PM) const { + for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) + if ((*GlobalExtensions)[i].first == ETy) + (*GlobalExtensions)[i].second(*this, PM); + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void +PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); +} + +void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + + // Add LibraryInfo if we have some. + if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + FPM.add(createScalarReplAggregatesPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { + // If all optimizations are disabled, just run the always-inline pass. + if (OptLevel == 0) { + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + if (!DisableUnitAtATime) + MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + if (!DisableSimplifyLibCalls) + MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createLoopRotatePass()); // Rotate Loop + MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createInstructionCombiningPass()); + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + + if (OptLevel > 1) + MPM.add(createGVNPass()); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + MPM.add(createInstructionCombiningPass()); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Clean up after everything. + + if (!DisableUnitAtATime) { + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O3 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 2) + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + + if (OptLevel > 1) + MPM.add(createConstantMergePass()); // Merge dup global constants + } +} + +void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, + bool Internalize, + bool RunInliner) { + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + // Now that composite has been compiled, scan through the module, looking + // for a main function. If main is defined, mark all other functions + // internal. + if (Internalize) + PM.add(createInternalizePass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createGlobalOptimizerPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + PM.add(createInstructionCombiningPass()); + + // Inline small functions + if (RunInliner) + PM.add(createFunctionInliningPass()); + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + PM.add(createInstructionCombiningPass()); + PM.add(createJumpThreadingPass()); + // Break up allocas + PM.add(createScalarReplAggregatesPass()); + + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createGlobalsModRefPass()); // IP alias analysis. + + PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createGVNPass()); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // Cleanup and simplify the code after the scalar optimizations. + PM.add(createInstructionCombiningPass()); + + PM.add(createJumpThreadingPass()); + + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnitAtATime = Value; +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableSimplifyLibCalls = Value; +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + bool Internalize, + bool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *LPM = unwrap(PM); + Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); +} + diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index b7e63dc..cbb80f0 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -101,8 +101,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // Check to see if this function performs an unwind or calls an // unwinding function. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) { - // Uses unwind! + if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) || + isa<ResumeInst>(BB->getTerminator()))) { + // Uses unwind / resume! SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) { SCCMightReturn = true; diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index 0fbaff1..b5caa9a 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -180,7 +180,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; - if (STy->isAnonymous() || STy->getName().empty()) continue; + if (STy->isLiteral() || STy->getName().empty()) continue; if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) continue; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 8257d6b..3808278 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -103,7 +104,7 @@ public: // Instruction *visitAdd(BinaryOperator &I); Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); Instruction *visitSub(BinaryOperator &I); Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); @@ -192,15 +193,16 @@ public: Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); + Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... Instruction *visitInstruction(Instruction &I) { return 0; } private: - bool ShouldChangeType(const Type *From, const Type *To) const; + bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -209,12 +211,13 @@ private: /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + Type *Ty); Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); @@ -357,7 +360,7 @@ private: Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); + Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); }; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c36a955..d10046c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -188,7 +188,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateMul(LHS, AddOne(C2)); // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); @@ -401,7 +401,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { Value *InstCombiner::EmitGEPOffset(User *GEP) { TargetData &TD = *getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will @@ -420,7 +420,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); if (Size) @@ -460,7 +460,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { + Type *Ty) { assert(TD && "Must have target data info for this"); // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 64ea36f..5e0bfe8 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1174,30 +1174,31 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast<BinaryOperator>(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); + // A&(A^B) => A & ~B + { + Value *tmpOp0 = Op0; + Value *tmpOp1 = Op1; + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1 ) { + tmpOp1 = Op0; + tmpOp0 = Op1; + // Simplify below + } } - } - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast<BinaryOperator>(Op1)->swapOperands(); - std::swap(A, B); + if (tmpOp1->hasOneUse() && + match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == tmpOp0) { + std::swap(A, B); + } + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } - // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if - // A is originally -1 (or a vector of -1 and undefs), then we enter - // an endless loop. By checking that A is non-constant we ensure that - // we will never get to the loop. - if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } // (A&((~A)|B)) -> A&B @@ -1224,7 +1225,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { @@ -2008,7 +2009,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { CastInst *Op1C = dyn_cast<CastInst>(Op1); if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); @@ -2227,14 +2228,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); if (B == Op1 && // (B&A)^A == ~B & A !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); + return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); } } } @@ -2288,7 +2289,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 537f2b3..c7b3ff8 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -22,8 +21,8 @@ using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { +static Type *getPromotedType(Type *Ty) { + if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { if (ITy->getBitWidth() < 32) return Type::getInt32Ty(Ty->getContext()); } @@ -64,7 +63,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAddrSp = cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); - const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); @@ -76,18 +75,18 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); if (StrippedDest != MI->getArgOperand(0)) { - const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { + if (StructType *STy = dyn_cast<StructType>(SrcETy)) { if (STy->getNumElements() == 1) SrcETy = STy->getElementType(0); else break; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { if (ATy->getNumElements() == 1) SrcETy = ATy->getElementType(); else @@ -142,7 +141,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); @@ -250,7 +249,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // We need target data for just about everything so depend on it. if (!TD) break; - const Type *ReturnTy = CI.getType(); + Type *ReturnTy = CI.getType(); uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; // Get to the real allocated thing and offset as fast as possible. @@ -266,8 +265,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Get the current byte offset into the thing. Use the original // operand in case we're looking through a bitcast. SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), - Ops.data(), Ops.size()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); Op1 = GEP->getPointerOperand()->stripPointerCasts(); @@ -300,7 +298,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } else if (CallInst *MI = extractMallocCall(Op1)) { // Get allocation size. - const Type* MallocType = getMallocAllocatedType(MI); + Type* MallocType = getMallocAllocatedType(MI); if (MallocType && MallocType->isSized()) if (Value *NElems = getMallocArraySize(MI, TD, true)) if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) @@ -355,7 +353,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); @@ -374,7 +372,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); @@ -392,7 +390,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -416,7 +414,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getTrue(II->getContext()) }; - const StructType *ST = cast<StructType>(II->getType()); + StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -430,7 +428,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - const StructType *ST = cast<StructType>(II->getType()); + StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -559,7 +557,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { - const Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); @@ -570,7 +568,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { - const Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); @@ -656,15 +654,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (ExtractedElts[Idx] == 0) { ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(II->getContext()), - Idx&15, false), "tmp"); + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->getInt32(Idx&15)); } // Insert this value into the result vector. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(II->getContext()), - i, false), "tmp"); + Builder->getInt32(i)); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -733,9 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) + // If the stack restore is in a return, resume, or unwind block and if there + // are no allocas or calls between the restore and the return, nuke the + // restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || + isa<UnwindInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -765,9 +763,9 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CS.paramHasAttr(ix, Attribute::ByVal)) return true; - const Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) @@ -820,6 +818,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { return Simplifier.NewInstruction; } +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { @@ -879,13 +954,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); // See if we can optimize any arguments passed through the varargs area of @@ -934,9 +1007,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // would cause a type conversion of one of our arguments, change this call to // be a direct call with arguments casted to the appropriate types. // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); + FunctionType *FT = Callee->getFunctionType(); + Type *OldRetTy = Caller->getType(); + Type *NewRetTy = FT->getReturnType(); if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. @@ -982,8 +1055,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallSite::arg_iterator AI = CS.arg_begin(); for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); + Type *ParamTy = FT->getParamType(i); + Type *ActTy = (*AI)->getType(); if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. @@ -995,11 +1068,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { - const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - const Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) return false; @@ -1023,7 +1096,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the callee is just a declaration, don't change the varargsness of the // call. We don't want to introduce a varargs call where one doesn't // already exist. - const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); + PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) return false; } @@ -1062,13 +1135,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); + Type *ParamTy = FT->getParamType(i); if ((*AI)->getType() == ParamTy) { Args.push_back(*AI); } else { Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, ParamTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy)); } // Add any parameter attributes. @@ -1089,12 +1162,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } else { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); + Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); } else { Args.push_back(*AI); } @@ -1138,13 +1211,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!NV->getType()->isVoidTy()) { Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, OldRetTy, false); - NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + NV = NC = CastInst::Create(opcode, NC, OldRetTy); NC->setDebugLoc(Caller->getDebugLoc()); // If this is an invoke instruction, we should insert it after the first // non-phi, instruction in the normal successor block. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); + BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); InsertNewInstBefore(NC, *I); } else { // Otherwise, it's a call, just insert cast right after the call. @@ -1163,13 +1236,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const AttrListPtr &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - @@ -1177,12 +1253,12 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); - const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); - const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 82c734e..f10e48a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -79,14 +80,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // This requires TargetData to get the alloca alignment and size information. if (!TD) return 0; - const PointerType *PTy = cast<PointerType>(CI.getType()); + PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); + Type *AllocElTy = AI.getAllocatedType(); + Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); @@ -121,13 +122,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, } else { Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); // Insert before the alloca, not before the cast. - Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); + Amt = AllocaBuilder.CreateMul(Amt, NumElements); } if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); - Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); + Amt = AllocaBuilder.CreateAdd(Amt, Off); } AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); @@ -151,7 +152,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -229,12 +230,12 @@ static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction + Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above + Type *SrcTy = CI->getOperand(0)->getType(); // A from above + Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); @@ -260,7 +261,7 @@ isEliminableCastPair( /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, - const Type *Ty) { + Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; @@ -324,7 +325,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, const Type *Ty) { +static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; @@ -332,7 +333,7 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) { Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const Type *OrigTy = V->getType(); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. @@ -435,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + Type *DestTy = CI.getType(), *SrcTy = Src->getType(); // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -456,7 +457,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { Constant *One = ConstantInt::get(Src->getType(), 1); - Src = Builder->CreateAnd(Src, One, "tmp"); + Src = Builder->CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -518,7 +519,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In->getType()->getScalarSizeInBits()-1); In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); @@ -572,7 +573,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, "tmp"); + In = Builder->CreateXor(In, One); } if (CI.getType() == In->getType()) @@ -586,7 +587,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // It is also profitable to transform icmp eq into not(xor(A, B)) because that // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -644,7 +645,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; @@ -758,7 +759,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -820,7 +821,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { AndValue)); } if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); + Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), @@ -867,7 +868,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *TI0 = TI->getOperand(0); if (TI0->getType() == CI.getType()) { Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); + Value *NewAnd = Builder->CreateAnd(TI0, ZC); return BinaryOperator::CreateXor(NewAnd, ZC); } } @@ -900,7 +901,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Op0->getType()->getScalarSizeInBits()-1); Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); if (Pred == ICmpInst::ICMP_SGT) In = Builder->CreateNot(In, In->getName()+".not"); @@ -965,10 +966,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. - if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && Op0->getType() == CI.getType()) { - const Type *EltTy = VTy->getElementType(); + Type *EltTy = VTy->getElementType(); // splat the shift constant to a constant vector. Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); @@ -988,7 +989,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, const Type *Ty) { +static bool CanEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -1063,7 +1064,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -1192,7 +1193,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - const Type *SrcTy = OpI->getType(); + Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && @@ -1306,13 +1307,13 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } if (CI.getOperand(0)->getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } } @@ -1351,7 +1352,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = + Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { @@ -1359,9 +1360,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // and bitcast the result. This eliminates one bitcast, potentially // two. Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1382,14 +1382,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (TD) { if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); + TD->getIntPtrType(CI.getContext())); return new TruncInst(P, CI.getType()); } if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); + TD->getIntPtrType(CI.getContext())); return new ZExtInst(P, CI.getType()); } } @@ -1402,12 +1400,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { /// replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, +static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. - const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + VectorType *SrcTy = cast<VectorType>(InVal->getType()); if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the @@ -1427,7 +1425,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, // size of the input. SmallVector<Constant*, 16> ShuffleMask; Value *V2; - const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low @@ -1453,11 +1451,11 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); } -static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) { +static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { return Value % Ty->getPrimitiveSizeInBits() == 0; } -static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { +static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { return Value / Ty->getPrimitiveSizeInBits(); } @@ -1471,7 +1469,7 @@ static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { /// filling in Elements with the elements found here. static bool CollectInsertionElements(Value *V, unsigned ElementIndex, SmallVectorImpl<Value*> &Elements, - const Type *VecEltTy) { + Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; @@ -1508,7 +1506,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); - const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), @@ -1572,7 +1570,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { - const VectorType *DestVecTy = cast<VectorType>(CI.getType()); + VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); @@ -1599,7 +1597,7 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(); + Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. @@ -1607,7 +1605,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa<VectorType>(VecInput->getType())) { - const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + VectorType *VecTy = cast<VectorType>(VecInput->getType()); unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { @@ -1628,7 +1626,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), m_ConstantInt(ShAmt)))) && isa<VectorType>(VecInput->getType())) { - const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + VectorType *VecTy = cast<VectorType>(VecInput->getType()); unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && ShAmt->getZExtValue() % DestWidth == 0) { @@ -1651,18 +1649,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); + Type *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); // Get rid of casts from one type to the same type. These are useless and can // be replaced by the operand. if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { - const PointerType *SrcPTy = cast<PointerType>(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); + if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + PointerType *SrcPTy = cast<PointerType>(SrcTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. @@ -1693,7 +1691,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end()); + return GetElementPtrInst::CreateInBounds(Src, Idxs); } } @@ -1702,7 +1700,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) return I; - if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, @@ -1731,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { Value *Elem = Builder->CreateExtractElement(Src, diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c78760b..bb1cbfa 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" @@ -56,7 +57,7 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getAdd(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasAddOverflow(ExtractElement(Result, Idx), @@ -78,7 +79,7 @@ static bool HasSubOverflow(ConstantInt *Result, bool IsSigned) { if (!IsSigned) return Result->getValue().ugt(In1->getValue()); - + if (In2->isNegative()) return Result->getValue().slt(In1->getValue()); @@ -91,7 +92,7 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getSub(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasSubOverflow(ExtractElement(Result, Idx), @@ -128,7 +129,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; return RHS->isMaxValue(true); - case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; return RHS->getValue().isSignBit(); @@ -143,7 +144,7 @@ static bool isHighOnes(const ConstantInt *CI) { return (~CI->getValue() + 1).isPowerOf2(); } -/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a /// set of known zero and one bits, compute the maximum and minimum values that /// could have the specified known zero and known one bits, returning them in /// min/max. @@ -160,7 +161,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - + if (UnknownBits.isNegative()) { // Sign bit is unknown Min.setBit(Min.getBitWidth()-1); Max.clearBit(Max.getBitWidth()-1); @@ -179,7 +180,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, KnownZero.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); APInt UnknownBits = ~(KnownZero|KnownOne); - + // The minimum value is when the unknown bits are all zeros. Min = KnownOne; // The maximum value is when the unknown bits are all ones. @@ -201,10 +202,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); if (Init == 0 || Init->getNumOperands() > 1024) return 0; - + // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // @@ -219,31 +220,31 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector<unsigned, 4> LaterIndices; - - const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + + Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. - + uint64_t IdxVal = Idx->getZExtValue(); if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast<StructType>(EltTy)) + + if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { if (IdxVal >= ATy->getNumElements()) return 0; EltTy = ATy->getElementType(); } else { return 0; // Unknown type. } - + LaterIndices.push_back(IdxVal); } - + enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. - + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form // "i == 47 | i == 87", where 47 is the first index the condition is true for, // and 87 is the second (and last) index. FirstTrueElement is -2 when @@ -254,7 +255,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these /// define a state machine that triggers for ranges of values that the index /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. @@ -262,25 +263,25 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// index in the range (inclusive). We use -2 for undefined here because we /// use relative comparisons and don't want 0-1 to match -1. int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - - + + // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { Constant *Elt = Init->getOperand(i); - + // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); - + // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - + // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, CompareRHS, TD); @@ -294,15 +295,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = i; continue; } - + // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. if (!isa<ConstantInt>(C)) return 0; - + // Otherwise, we know if the comparison is true or false for this element, // update our state machines. bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); - + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. @@ -314,7 +315,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondTrueElement = i; else SecondTrueElement = Overdefined; - + // Update range state machine. if (TrueRangeEnd == (int)i-1) TrueRangeEnd = i; @@ -331,7 +332,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondFalseElement = i; else SecondFalseElement = Overdefined; - + // Update range state machine. if (FalseRangeEnd == (int)i-1) FalseRangeEnd = i; @@ -339,12 +340,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = Overdefined; } } - - + + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - + // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -364,20 +365,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); - + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - + // True for one element -> 'i == 47'. if (SecondTrueElement == Undefined) return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - + // True for two elements -> 'i == 47 | i == 72'. Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); @@ -391,36 +392,36 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // None false -> true. if (FirstFalseElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); // False for one element -> 'i != 47'. if (SecondFalseElement == Undefined) return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - + // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } - + // If the comparison can be replaced with a range comparison for the elements // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). if (FirstTrueElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), TrueRangeEnd-FirstTrueElement+1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } - + // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); @@ -429,19 +430,19 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), FalseRangeEnd-FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - - + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 if (Init->getNumOperands() <= 32 || (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; + Type *Ty; if (Init->getNumOperands() <= 32) Ty = Type::getInt32Ty(Init->getContext()); else @@ -451,7 +452,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - + return 0; } @@ -465,11 +466,11 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. -/// +/// static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - + // Check to see if this gep only has a single variable index. If so, and if // any constant indices are a multiple of its scale, then we can compute this // in terms of the scale of the variable index. For example, if the GEP @@ -481,9 +482,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -494,33 +495,33 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { break; } } - + // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. if (i == e) return 0; - + Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is // 4 if the variable index is into an array of i32. uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - + // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (!CI) return 0; - + // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*CI->getSExtValue(); } } - + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -530,19 +531,19 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; } - + // Otherwise, there is an index. The computation we will do will be modulo // the pointer size, so get it. uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - + Offset &= PtrSizeMask; VariableScale &= PtrSizeMask; - + // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a @@ -550,9 +551,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) return 0; - + // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); @@ -576,7 +577,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); - + // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS); @@ -686,7 +687,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, bool isTrue = ICmpInst::isTrueWhenEqual(Pred); return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); } - + // (X+4) == X -> false. if (Pred == ICmpInst::ICMP_EQ) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); @@ -698,22 +699,22 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similarly for all other "or equals" // operators. - + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - Value *R = + Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } - + // (X+1) >u X --> X <u (0-1) --> X != 255 // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), APInt::getSignedMaxValue(BitWidth)); @@ -726,14 +727,14 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -745,14 +746,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS) { ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide + + // FIXME: If the operand types don't match the type of the divide // then don't attempt this transform. The code below doesn't have the // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) <s C2 produces different + // vice versa). This is because (x /s C1) <s C2 produces different // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even - // (x /u C1) <u C2. Simply casting the operands and result won't - // work. :( The if statement below tests that condition and bails + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) @@ -768,14 +769,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. + // as in the LHS instruction that we're folding. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; @@ -785,9 +786,9 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, /// If the division is known to be exact, then there is no remainder from the /// divide, so the covered range size is unit, otherwise it is the divisor. ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; - + // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of // the compare/divide. This computes a half-open interval, keeping track of // whether either value in the interval overflows. After analysis each @@ -805,7 +806,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // to the same result value. HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); } - + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) @@ -848,7 +849,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, if (!HiOverflow) HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } - + // Dividing by a negative swaps the condition. LT <-> GT Pred = ICmpInst::getSwappedPredicate(Pred); } @@ -901,7 +902,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, ConstantInt *ShAmt) { const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. @@ -909,48 +910,48 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) return 0; - + if (!ICI.isEquality()) { // If we have an unsigned comparison and an ashr, we can't simplify this. // Similarly for signed comparisons with lshr. if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) return 0; - + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv // by a power of 2. Since we already have logic to simplify these, // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) return 0; - + // Revisit the shift (to delete it). Worklist.Add(Shr); - + Constant *DivCst = ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - + Value *Tmp = Shr->getOpcode() == Instruction::AShr ? Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); - + ICI.setOperand(0, Tmp); - + // If the builder folded the binop, just return it. BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); if (TheDiv == 0) return &ICI; - + // Otherwise, fold this div/compare. assert(TheDiv->getOpcode() == Instruction::SDiv || TheDiv->getOpcode() == Instruction::UDiv); - + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); assert(Res && "This div/cst should have folded!"); return Res; } - - + + // If we are comparing against bits always shifted out, the // comparison cannot succeed. APInt Comp = CmpRHSV << ShAmtVal; @@ -959,25 +960,25 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, Comp = Comp.lshr(ShAmtVal); else Comp = Comp.ashr(ShAmtVal); - + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + // Otherwise, check to see if the bits shifted out are known to be zero. // If so, we can compare against the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. if (Shr->hasOneUse() && Shr->isExact()) return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); - + if (Shr->hasOneUse()) { // Otherwise strength reduce the shift into an and. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - + Value *And = Builder->CreateAnd(Shr->getOperand(0), Mask, Shr->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); @@ -992,7 +993,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHSI, ConstantInt *RHS) { const APInt &RHSV = RHS->getValue(); - + switch (LHSI->getOpcode()) { case Instruction::Trunc: if (ICI.isEquality() && LHSI->hasOneUse()) { @@ -1003,7 +1004,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - + // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. @@ -1014,7 +1015,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } break; - + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), @@ -1022,7 +1023,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - + // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. if (!XorCST->isNegative()) { @@ -1030,13 +1031,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Worklist.Add(LHSI); return &ICI; } - + // Was the old condition true if the operand is positive? bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - + // If so, the new one isn't. isTrueIfPositive ^= true; - + if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); @@ -1075,13 +1076,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); - + // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value // produced, eliminating a cast. if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. + // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. if (ICI.isEquality() || @@ -1098,7 +1099,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If the LHS is an AND of a zext, and we have an equality compare, we can // shrink the and/compare to the smaller type, eliminating the cast. if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { - const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); // Make sure we don't compare the upper bits, SimplifyDemandedBits // should fold the icmp to true/false in that case. if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { @@ -1118,12 +1119,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) Shift = 0; - + ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + // We can fold this as long as we can't shift unknown bits // into the mask. This can only happen with signed shift // rights, as they sign-extend. @@ -1134,20 +1135,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; } - + if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); - + // Check to see if we are shifting out any of the bits being // compared. if (ConstantExpr::get(Shift->getOpcode(), @@ -1175,7 +1176,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is // preferable because it allows the C<<Y expression to be hoisted out // of a loop if Y is invariant and X is not. @@ -1185,21 +1186,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Compute C << Y. Value *NS; if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateShl(AndCST, Shift->getOperand(1)); } else { // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); } - + // Compute X & (C << Y). - Value *NewAnd = + Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - + ICI.setOperand(0, NewAnd); return &ICI; } } - + // Try to optimize things like "A[i]&42 == 0" to index computations. if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { if (GetElementPtrInst *GEP = @@ -1234,19 +1235,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; - + uint32_t TypeBits = RHSV.getBitWidth(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. if (ShAmt->uge(TypeBits)) break; - + if (ICI.isEquality()) { // If we are comparing against bits always shifted out, the // comparison cannot succeed. @@ -1259,34 +1260,34 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + // If the shift is NUW, then it is just shifting out zeros, no need for an // AND. if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); - + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - + Value *And = Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ConstantExpr::getLShr(RHS, ShAmt)); } } - + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), - APInt::getOneBitSet(TypeBits, + APInt::getOneBitSet(TypeBits, TypeBits-ShAmt->getZExtValue()-1)); Value *And = Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); @@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) case Instruction::AShr: { // Handle equality comparisons of shift-by-constant. @@ -1312,13 +1313,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::SDiv: case Instruction::UDiv: // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) @@ -1357,12 +1358,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. if (ICI.isEquality()) { bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and // the second operand is a constant, simplify a bit. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { switch (BO->getOpcode()) { @@ -1389,7 +1390,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); if (Value *NegVal = dyn_castNegVal(BOp0)) @@ -1432,11 +1433,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); } break; - + case Instruction::And: if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { // If bits are being compared against that are and'd out, then the @@ -1445,7 +1446,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); - + // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : @@ -1460,16 +1461,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); } - + // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); } @@ -1517,11 +1518,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && TD->getPointerSizeInBits() == @@ -1539,7 +1540,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); } - + // The code below only handles extension cast instructions, so far. // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && @@ -1552,9 +1553,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) + if (RHSCIOp->getType() != LHSCIOp->getType()) return 0; - + // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) @@ -1599,7 +1600,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } - // The re-extended constant changed so the constant cannot be represented + // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. // All the cases that fold to true or false will have already been handled // by SimplifyICmpInst, so only deal with the tricky case. @@ -1637,26 +1638,26 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // llvm.sadd.with.overflow. To do this, we have to replace the original add // with a narrower add, and discard the add-with-constant that is part of the // range check (if we can't eliminate it, this isn't profitable). - + // In order to eliminate the add-with-constant, the compare can be its only // use. Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); if (!AddWithCst->hasOneUse()) return 0; - + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. if (!CI2->getValue().isPowerOf2()) return 0; unsigned NewWidth = CI2->getValue().countTrailingZeros(); if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; - + // The width of the new add formed is 1 more than the bias. ++NewWidth; - + // Check to see that CI1 is an all-ones value with NewWidth bits. if (CI1->getBitWidth() == NewWidth || CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) return 0; - - // In order to replace the original add with a narrower + + // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant // and truncates that discard the high bits of the add. Verify that this is // the case. @@ -1664,7 +1665,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); UI != E; ++UI) { if (*UI == AddWithCst) continue; - + // Only accept truncates for now. We would really like a nice recursive // predicate like SimplifyDemandedBits, but which goes downwards the use-def // chain to see which bits of a value are actually demanded. If the @@ -1674,32 +1675,32 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, if (TI == 0 || TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; } - + // If the pattern matches, truncate the inputs to the narrower type and // use the sadd_with_overflow intrinsic to efficiently compute both the // result and the overflow bit. Module *M = I.getParent()->getParent()->getParent(); - + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, NewType); InstCombiner::BuilderTy *Builder = IC.Builder; - + // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. Builder->SetInsertPoint(OrigAdd); - + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); - + // The inner add was the result of the narrow add, zero extended to the // wider type. Replace it with the result computed by the intrinsic. IC.ReplaceInstUsesWith(*OrigAdd, ZExt); - + // The original icmp gets replaced with the overflow value. return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } @@ -1709,13 +1710,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, // Don't bother doing this transformation for pointers, don't do it for // vectors. if (!isa<IntegerType>(OrigAddV->getType())) return 0; - + // If the add is a constant expr, then we don't bother transforming it. Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); if (OrigAdd == 0) return 0; - + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); - + // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. InstCombiner::BuilderTy *Builder = IC.Builder; @@ -1740,13 +1741,13 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) return APInt::getSignBit(BitWidth); - + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); const APInt &RHS = CI->getValue(); - + switch (I.getPredicate()) { - // For a UGT comparison, we don't care about any bits that + // For a UGT comparison, we don't care about any bits that // correspond to the trailing ones of the comparand. The value of these // bits doesn't impact the outcome of the comparison, because any value // greater than the RHS must differ in a bit higher than these due to carry. @@ -1755,7 +1756,7 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); return ~lowBitsSet; } - + // Similarly, for a ULT comparison, we don't care about the trailing zeros. // Any value less than the RHS must differ in a higher bit because of carries. case ICmpInst::ICMP_ULT: { @@ -1763,17 +1764,17 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); return ~lowBitsSet; } - + default: return APInt::getAllOnesValue(BitWidth); } - + } Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -1782,11 +1783,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { std::swap(Op0, Op1); Changed = true; } - + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); + + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty->isIntegerTy(1)) { @@ -1835,13 +1836,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BitWidth = Ty->getScalarSizeInBits(); else if (TD) // Pointers require TD info to get their size. BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - + bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { Value *A = 0, *B = 0; - + // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an // addition in wider type, and explicitly checks for overflow using @@ -1849,9 +1850,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // sadd_with_overflow intrinsic. // // TODO: This could probably be generalized to handle other overflow-safe - // operations if we worked out the formulas to compute the appropriate + // operations if we worked out the formulas to compute the appropriate // magic constants. - // + // // sum = a + b // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 { @@ -1861,14 +1862,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) return Res; } - + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); } - + // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on // them being folded in the code below. The SimplifyICmpInst code has @@ -1892,7 +1893,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } - + // If this comparison is a normal comparison, it demands all // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; @@ -1948,7 +1949,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1960,7 +1961,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". Value *X = 0; @@ -1969,7 +1970,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_NE, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) == 0" into "x != 3". const APInt *CI; @@ -1979,13 +1980,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); - + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1997,7 +1998,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". Value *X = 0; @@ -2006,7 +2007,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_EQ, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) != 0" into "x == 3". const APInt *CI; @@ -2016,7 +2017,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_ULT: @@ -2137,9 +2138,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) return Res; @@ -2194,7 +2195,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -2227,8 +2228,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (Op0->getType()->isPointerTy() && - (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + if (Op0->getType()->isPointerTy() && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. Op0 = CI->getOperand(0); @@ -2250,7 +2251,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), Op0, Op1); } } - + if (isa<CastInst>(Op0)) { // Handle the special case of: icmp (cast bool to X), <cst> // This comes up when you have code like @@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } - + if (CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() @@ -2404,7 +2405,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Mask = -1 >> count-trailing-zeros(Cst). if (!CI->isZero() && !CI->isOne()) { const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), + ConstantInt *Mask = ConstantInt::get(I.getContext(), APInt::getLowBitsSet(AP.getBitWidth(), AP.getBitWidth() - AP.countTrailingZeros())); @@ -2438,7 +2439,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } } - + { Value *A, *B; // ~x < ~y --> y < x // ~x < cst --> ~cst < x @@ -2452,11 +2453,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // (a+b) <u a --> llvm.uadd.with.overflow. // (a+b) <u b --> llvm.uadd.with.overflow. if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op0, m_Add(m_Value(A), m_Value(B))) && (Op1 == A || Op1 == B)) if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) return R; - + // a >u (a+b) --> llvm.uadd.with.overflow. // b >u (a+b) --> llvm.uadd.with.overflow. if (I.getPredicate() == ICmpInst::ICMP_UGT && @@ -2465,7 +2466,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) return R; } - + if (I.isEquality()) { Value *A, *B, *C, *D; @@ -2483,10 +2484,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { Constant *NC = ConstantInt::get(I.getContext(), C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); + Value *Xor = Builder->CreateXor(C, NC); return new ICmpInst(I.getPredicate(), A, Xor); } - + // A^B == A^D -> B == D if (A == C) return new ICmpInst(I.getPredicate(), B, D); if (A == D) return new ICmpInst(I.getPredicate(), B, C); @@ -2494,7 +2495,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (B == D) return new ICmpInst(I.getPredicate(), A, C); } } - + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 @@ -2504,10 +2505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; - + if (A == C) { X = B; Y = D; Z = A; } else if (A == D) { @@ -2517,16 +2518,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } else if (B == D) { X = A; Y = C; Z = B; } - + if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); + Op1 = Builder->CreateXor(X, Y); + Op1 = Builder->CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; } } - + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; @@ -2539,21 +2540,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // when it exposes other optimizations. !A->hasOneUse()) { unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); - + if (ShAmt < ASize) { APInt MaskV = APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); MaskV <<= ShAmt; - + APInt CmpV = Cst1->getValue().zext(ASize); CmpV <<= ShAmt; - + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); } } } - + { Value *X; ConstantInt *Cst; // icmp X+Cst, X @@ -2579,31 +2580,31 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Constant *RHSC) { if (!isa<ConstantFP>(RHSC)) return 0; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); - + // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); if (MantissaWidth == -1) return 0; // Unknown. - + // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - + // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); if (LHSUnsigned) ++InputSize; - + // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) return 0; - + // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is // not a NAN (it would have been previously simplified). assert(!RHS.isNaN() && "NaN comparison not already folded!"); - + ICmpInst::Predicate Pred; switch (I.getPredicate()) { default: llvm_unreachable("Unexpected predicate!"); @@ -2636,15 +2637,15 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case FCmpInst::FCMP_UNO: return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } - - const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); - + + IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + // Now we know that the APFloat is a normal number, zero or inf. - + // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. unsigned IntWidth = IntTy->getScalarSizeInBits(); - + if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF // and large values. @@ -2670,7 +2671,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } } - + if (!LHSUnsigned) { // See if the RHS value is < SignedMin. APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); @@ -2766,7 +2767,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -2776,7 +2777,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); @@ -2792,7 +2793,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { I.setPredicate(FCmpInst::FCMP_UNO); I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; - + case FCmpInst::FCMP_ORD: // True if ordered (no nans) case FCmpInst::FCMP_OEQ: // True if ordered and equal case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal @@ -2803,7 +2804,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return &I; } } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) @@ -2836,10 +2837,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { APFloat F = RHSF->getValueAPF(); F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); - // Avoid lossy conversions and denormals. + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); if (!Lossy && - F.compare(APFloat::getSmallestNormalized(*Sem)) != - APFloat::cmpLessThan) + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), ConstantFP::get(RHSC->getContext(), F)); break; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index f499290..7446a51 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -26,7 +26,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -38,7 +38,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - const Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); @@ -58,8 +58,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { Idx[0] = NullIdx; Idx[1] = NullIdx; Instruction *GEP = - GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub"); + GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original @@ -92,28 +91,28 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - const PointerType *DestTy = cast<PointerType>(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + PointerType *DestTy = cast<PointerType>(CI->getType()); + Type *DestPTy = DestTy->getElementType(); + if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. - if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } @@ -133,6 +132,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -163,8 +163,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; + // None of the following transforms are legal for volatile/atomic loads. + // FIXME: Some of it is okay for atomic loads; needs refactoring. + if (!LI.isSimple()) return 0; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, @@ -256,11 +257,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; @@ -280,12 +281,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { NewGEPIndices.push_back(Zero); while (1) { - if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { @@ -314,8 +315,8 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Value *NewCast; Value *SIOp0 = SI.getOperand(0); Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; + Type* CastSrcTy = SIOp0->getType(); + Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; @@ -327,8 +328,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); @@ -370,21 +370,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) - return EraseInstFromFunction(SI); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { - if (isa<AllocaInst>(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) - return EraseInstFromFunction(SI); - } - } - } - } - // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = @@ -400,6 +385,23 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { SI.setAlignment(EffectiveStoreAlign); } + // Don't hack volatile/atomic stores. + // FIXME: Some bits are legal for atomic stores; needs refactoring. + if (!SI.isSimple()) return 0; + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + } + } + } + // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -417,8 +419,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { + if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); @@ -432,7 +434,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) + LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it @@ -444,9 +446,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { @@ -549,11 +548,11 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; --BBI; } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. + // If this isn't a store, isn't a store to the same location, or is not the + // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -569,7 +568,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; break; } @@ -601,10 +600,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Advance to a place where it is safe to insert the new store and // insert it. - BBI = DestBB->getFirstNonPHI(); + BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()); + SI.isVolatile(), + SI.getAlignment(), + SI.getOrdering(), + SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 630a6fe..7f48125 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -38,7 +38,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { m_Value(B))) && // The "1" can be any value known to be a power of 2. isPowerOfTwo(PowerOf2, IC.getTargetData())) { - A = IC.Builder->CreateSub(A, B, "tmp"); + A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -131,7 +131,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { - Value *Add = Builder->CreateMul(X, CI, "tmp"); + Value *Add = Builder->CreateMul(X, CI); return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } } @@ -244,7 +244,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (BoolCast) { Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), - BoolCast, "tmp"); + BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } } @@ -421,7 +421,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { /// dyn_castZExtVal - Checks if V is a zext or constant that can /// be truncated to Ty without losing bits. -static Value *dyn_castZExtVal(Value *V, const Type *Ty) { +static Value *dyn_castZExtVal(Value *V, Type *Ty) { if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { if (Z->getSrcTy() == Ty) return Z->getOperand(0); @@ -466,8 +466,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { if (*CI != 1) - N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()), - "tmp"); + N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); if (I.isExact()) return BinaryOperator::CreateExactLShr(Op0, N); return BinaryOperator::CreateLShr(Op0, N); @@ -630,7 +629,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) if (match(Op1, m_Shl(m_Power2(), m_Value()))) { Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(Op1, N1, "tmp"); + Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 3777340..664546c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -28,8 +28,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); + Type *LHSType = LHSVal->getType(); + Type *RHSType = RHSVal->getType(); bool isNUW = false, isNSW = false, isExact = false; if (OverflowingBinaryOperator *BO = @@ -229,8 +229,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; @@ -287,7 +286,12 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); - + + // FIXME: This is overconservative; this transform is allowed in some cases + // for atomic operations. + if (FirstLI->isAtomic()) + return 0; + // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently // don't sink loads when some have their alignment specified and some don't. @@ -397,7 +401,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; + Type *CastSrcTy = 0; bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { @@ -572,7 +576,7 @@ struct LoweredPHIRecord { unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. @@ -701,7 +705,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); + Type *Ty = PHIUsers[UserI].Inst->getType(); PHINode *EltPHI; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5733c20..91e60a4 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" using namespace llvm; using namespace PatternMatch; @@ -323,9 +324,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } // All operands were constants, fold it. - if (ConstOps.size() == I->getNumOperands()) + if (ConstOps.size() == I->getNumOperands()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps.data(), ConstOps.size(), TD); + ConstOps, TD); + } } return 0; @@ -363,7 +369,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: { // These transformations only work for selects over integers. - const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); if (!SelectTy) break; @@ -443,7 +449,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. - if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { ConstantInt *C1 = NULL, *C2 = NULL; @@ -476,10 +482,16 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + return ReplaceInstUsesWith(SI, TrueVal); } // NOTE: if we wanted to, this is where to detect integer MIN/MAX diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 811f949..6d85add 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -207,11 +208,12 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, return I; case Instruction::Shl: { - unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. - ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); - + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. @@ -219,7 +221,9 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, if (NewShAmt >= TypeWidth) return Constant::getNullValue(I->getType()); - I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); return I; } @@ -227,11 +231,11 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); - V = IC.Builder->CreateAnd(I->getOperand(0), - ConstantInt::get(I->getContext(), Mask)); + V = IC.Builder->CreateAnd(BO->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { - VI->moveBefore(I); - VI->takeName(I); + VI->moveBefore(BO); + VI->takeName(BO); } return V; } @@ -239,23 +243,27 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); - I->setOperand(1, ConstantInt::get(I->getType(), - CI->getZExtValue() - NumBits)); - return I; + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return BO; } case Instruction::LShr: { - unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. - ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) - return Constant::getNullValue(I->getType()); + return Constant::getNullValue(BO->getType()); - I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setIsExact(false); return I; } @@ -264,7 +272,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, if (CI->getValue() == NumBits) { APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(I->getOperand(0), - ConstantInt::get(I->getContext(), Mask)); + ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(I); VI->takeName(I); @@ -275,9 +283,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); - I->setOperand(1, ConstantInt::get(I->getType(), - CI->getZExtValue() - NumBits)); - return I; + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setIsExact(false); + return BO; } case Instruction::Select: @@ -528,7 +537,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - const IntegerType *Ty = cast<IntegerType>(I.getType()); + IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8fea8eb..5cd9a4b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -103,7 +103,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(V != 0 && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); + Type *VTy = V->getType(); assert((TD || !VTy->isPointerTy()) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && @@ -325,8 +325,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { Constant *AndC = Constant::getIntegerValue(VTy, ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); return InsertNewInstWith(And, *I); } } @@ -351,14 +350,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); return InsertNewInstWith(NewXor, *I); } @@ -404,8 +401,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return 0; // vector->int or fp->int? - if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { - if (const VectorType *SrcVTy = + if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. @@ -826,7 +823,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = 0; if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -855,7 +852,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (DemandedElts.isAllOnesValue()) return 0; - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Zero = Constant::getNullValue(EltTy); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -962,6 +959,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { UndefElts.setBit(i); + } else if (!DemandedElts[i]) { + NewUndefElts = true; + UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { if (UndefElts4[MaskVal]) { NewUndefElts = true; @@ -992,7 +992,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } case Instruction::BitCast: { // Vector->vector casts only. - const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); APInt InputDemandedElts(InVWidth, 0); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index ad6a8d0..154267c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -77,7 +77,7 @@ static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - const VectorType *PTy = cast<VectorType>(V->getType()); + VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); @@ -175,7 +175,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) @@ -225,7 +225,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); } - const Type *Int32Ty = Type::getInt32Ty(EI.getContext()); + Type *Int32Ty = Type::getInt32Ty(EI.getContext()); return ExtractElementInst::Create(Src, ConstantInt::get(Int32Ty, SrcIdx, false)); @@ -555,7 +555,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // shuffle mask, do the replacement. if (isSplat || NewMask == LHSMask || NewMask == Mask) { std::vector<Constant*> Elts; - const Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { if (NewMask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index ab98ef9..92874b9 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -46,8 +46,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm-c/Initialization.h" #include <algorithm> #include <climits> @@ -83,7 +85,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. -bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); // If we don't have TD, we don't know if the source/dest are legal. @@ -107,6 +109,43 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { return true; } +// Return true, if No Signed Wrap should be maintained for I. +// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", +// where both B and C should be ConstantInts, results in a constant that does +// not overflow. This function only handles the Add and Sub opcodes. For +// all other opcodes, the function conservatively returns false. +static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + if (!OBO || !OBO->hasNoSignedWrap()) { + return false; + } + + // We reason about Add and Sub Only. + Instruction::BinaryOps Opcode = I.getOpcode(); + if (Opcode != Instruction::Add && + Opcode != Instruction::Sub) { + return false; + } + + ConstantInt *CB = dyn_cast<ConstantInt>(B); + ConstantInt *CC = dyn_cast<ConstantInt>(C); + + if (!CB || !CC) { + return false; + } + + const APInt &BVal = CB->getValue(); + const APInt &CVal = CC->getValue(); + bool Overflow = false; + + if (Opcode == Instruction::Add) { + BVal.sadd_ov(CVal, Overflow); + } else { + BVal.ssub_ov(CVal, Overflow); + } + + return !Overflow; +} /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: @@ -158,7 +197,16 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + if (MaintainNoSignedWrap(I, B, C) && + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. + I.clearSubclassOptionalData(); + I.setHasNoSignedWrap(true); + } else { + I.clearSubclassOptionalData(); + } + Changed = true; ++NumReassoc; continue; @@ -240,7 +288,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Constant *C2 = cast<Constant>(Op1->getOperand(1)); Constant *Folded = ConstantExpr::get(Opcode, C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, A, B); + BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); InsertNewInstWith(New, I); New->takeName(Op1); I.setOperand(0, New); @@ -248,6 +296,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. I.clearSubclassOptionalData(); + Changed = true; continue; } @@ -516,8 +565,8 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { - const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); - const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); // Verify that either both or neither are vectors. if ((SrcTy == NULL) != (DestTy == NULL)) return 0; @@ -654,7 +703,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } } else { CastInst *CI = cast<CastInst>(&I); - const Type *RetTy = CI->getType(); + Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) @@ -680,7 +729,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -688,7 +737,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -711,7 +760,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -722,7 +771,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); @@ -737,12 +786,20 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, return Ty; } - +static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && + !Src.hasOneUse()) + return false; + return true; +} Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + if (Value *V = SimplifyGEPInst(Ops, TD)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -751,13 +808,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { // Skip indices into struct types. - const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); if (!SeqTy) continue; // If the element type has zero size then any index over it is equivalent @@ -785,21 +842,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { - - // If this GEP has only 0 indices, it is the same pointer as - // Src. If Src is not a trivial GEP too, don't combine - // the indices. - if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() && - !Src->hasOneUse()) + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) return 0; // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast<GetElementPtrInst>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) + if (GEPOperator *SrcGEP = + dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return 0; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -851,15 +902,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Indices.empty()) return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()); + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, + GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); if (StrippedPtr != PtrOp && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -875,21 +925,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); - if (const ArrayType *CATy = + PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx.begin(), - Idx.end(), GEP.getName()); + GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); return Res; } - if (const ArrayType *XATy = + if (ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { @@ -907,8 +956,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = StrippedPtrTy->getElementType(); - const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + Type *SrcElTy = StrippedPtrTy->getElementType(); + Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { @@ -916,8 +965,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = GEP.getOperand(1); Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -975,8 +1024,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = NewIdx; Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()): - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -1023,14 +1072,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - const Type *InTy = + Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -1045,15 +1092,43 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { -static bool IsOnlyNullComparedAndFreed(const Value &V) { - for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); +static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, + int Depth = 0) { + if (Depth == 8) + return false; + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { - const User *U = *UI; - if (isFreeCall(U)) + User *U = *UI; + if (isFreeCall(U)) { + Users.push_back(U); continue; - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { + Users.push_back(ICI); + continue; + } + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { + Users.push_back(BCI); + continue; + } + } + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { + Users.push_back(GEPI); + continue; + } + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + Users.push_back(II); continue; + } + } return false; } return true; @@ -1063,25 +1138,20 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. - if (IsOnlyNullComparedAndFreed(MI)) { - for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end(); - UI != UE;) { - // We can assume that every remaining use is a free call or an icmp eq/ne - // to null, so the cast is safe. - Instruction *I = cast<Instruction>(*UI); - - // Early increment here, as we're about to get rid of the user. - ++UI; - - if (isFreeCall(I)) { - EraseInstFromFunction(*cast<CallInst>(I)); - continue; + SmallVector<WeakVH, 64> Users; + if (IsOnlyNullComparedAndFreed(&MI, Users)) { + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *I = cast_or_null<Instruction>(&*Users[i]); + if (!I) continue; + + if (ICmpInst *C = dyn_cast<ICmpInst>(I)) { + ReplaceInstUsesWith(*C, + ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); } - // Again, the cast is safe. - ICmpInst *C = cast<ICmpInst>(I); - ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), - C->isFalseWhenEqual())); - EraseInstFromFunction(*C); + EraseInstFromFunction(*I); } return EraseInstFromFunction(MI); } @@ -1120,8 +1190,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { !isa<Constant>(X)) { // Swap Destinations and condition... BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); return &BI; } @@ -1136,8 +1205,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -1153,8 +1221,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -1168,11 +1235,17 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), + AddRHS); + assert(isa<ConstantInt>(NewCaseVal) && + "Result of expression should be constant"); + SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + } + SI.setCondition(I->getOperand(0)); Worklist.Add(I); return &SI; } @@ -1242,7 +1315,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - ArrayRef<unsigned>(insi, inse)); + makeArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -1254,7 +1327,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - ArrayRef<unsigned>(exti, exte)); + makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which @@ -1310,7 +1383,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // load from a GEP. This reduces the size of the load. // FIXME: If a load is used only by extractvalue instructions then this // could be done regardless of having multiple uses. - if (!L->isVolatile() && L->hasOneUse()) { + if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector<Value*, 4> Indices; // Prefix an i32 0 since we need the first element. @@ -1322,8 +1395,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder->SetInsertPoint(L->getParent(), L); - Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), - Indices.begin(), Indices.end()); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use ReplaceInstUsesWith(). return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); @@ -1339,6 +1411,342 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return 0; } +enum Personality_Type { + Unknown_Personality, + GNU_Ada_Personality, + GNU_CXX_Personality +}; + +/// RecognizePersonality - See if the given exception handling personality +/// function is one that we understand. If so, return a description of it; +/// otherwise return Unknown_Personality. +static Personality_Type RecognizePersonality(Value *Pers) { + Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return Unknown_Personality; + return StringSwitch<Personality_Type>(F->getName()) + .Case("__gnat_eh_personality", GNU_Ada_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Default(Unknown_Personality); +} + +/// isCatchAll - Return 'true' if the given typeinfo will match anything. +static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { + switch (Personality) { + case Unknown_Personality: + return false; + case GNU_Ada_Personality: + // While __gnat_all_others_value will match any Ada exception, it doesn't + // match foreign exceptions (or didn't, before gcc-4.7). + return false; + case GNU_CXX_Personality: + return TypeInfo->isNullValue(); + } + llvm_unreachable("Unknown personality!"); +} + +static bool shorter_filter(const Value *LHS, const Value *RHS) { + return + cast<ArrayType>(LHS->getType())->getNumElements() + < + cast<ArrayType>(RHS->getType())->getNumElements(); +} + +Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { + // The logic here should be correct for any real-world personality function. + // However if that turns out not to be true, the offending logic can always + // be conditioned on the personality function, like the catch-all logic is. + Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn()); + + // Simplify the list of clauses, eg by removing repeated catch clauses + // (these are often created by inlining). + bool MakeNewInstruction = false; // If true, recreate using the following: + SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + + SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. + for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { + bool isLastClause = i + 1 == e; + if (LI.isCatch(i)) { + // A catch clause. + Value *CatchClause = LI.getClause(i); + Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + + // If we already saw this clause, there is no point in having a second + // copy of it. + if (AlreadyCaught.insert(TypeInfo)) { + // This catch clause was not already seen. + NewClauses.push_back(CatchClause); + } else { + // Repeated catch clause - drop the redundant copy. + MakeNewInstruction = true; + } + + // If this is a catch-all then there is no point in keeping any following + // clauses or marking the landingpad as having a cleanup. + if (isCatchAll(Personality, TypeInfo)) { + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + } else { + // A filter clause. If any of the filter elements were already caught + // then they can be dropped from the filter. It is tempting to try to + // exploit the filter further by saying that any typeinfo that does not + // occur in the filter can't be caught later (and thus can be dropped). + // However this would be wrong, since typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some + // class derived from it). + assert(LI.isFilter(i) && "Unsupported landingpad clause!"); + Value *FilterClause = LI.getClause(i); + ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); + unsigned NumTypeInfos = FilterType->getNumElements(); + + // An empty filter catches everything, so there is no point in keeping any + // following clauses or marking the landingpad as having a cleanup. By + // dealing with this case here the following code is made a bit simpler. + if (!NumTypeInfos) { + NewClauses.push_back(FilterClause); + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + + bool MakeNewFilter = false; // If true, make a new filter. + SmallVector<Constant *, 16> NewFilterElts; // New elements. + if (isa<ConstantAggregateZero>(FilterClause)) { + // Not an empty filter - it contains at least one null typeinfo. + assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); + Constant *TypeInfo = + Constant::getNullValue(FilterType->getElementType()); + // If this typeinfo is a catch-all then the filter can never match. + if (isCatchAll(Personality, TypeInfo)) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // There is no point in having multiple copies of this typeinfo, so + // discard all but the first copy if there is more than one. + NewFilterElts.push_back(TypeInfo); + if (NumTypeInfos > 1) + MakeNewFilter = true; + } else { + ConstantArray *Filter = cast<ConstantArray>(FilterClause); + SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. + NewFilterElts.reserve(NumTypeInfos); + + // Remove any filter elements that were already caught or that already + // occurred in the filter. While there, see if any of the elements are + // catch-alls. If so, the filter can be discarded. + bool SawCatchAll = false; + for (unsigned j = 0; j != NumTypeInfos; ++j) { + Value *Elt = Filter->getOperand(j); + Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + if (isCatchAll(Personality, TypeInfo)) { + // This element is a catch-all. Bail out, noting this fact. + SawCatchAll = true; + break; + } + if (AlreadyCaught.count(TypeInfo)) + // Already caught by an earlier clause, so having it in the filter + // is pointless. + continue; + // There is no point in having multiple copies of the same typeinfo in + // a filter, so only add it if we didn't already. + if (SeenInFilter.insert(TypeInfo)) + NewFilterElts.push_back(cast<Constant>(Elt)); + } + // A filter containing a catch-all cannot match anything by definition. + if (SawCatchAll) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // If we dropped something from the filter, make a new one. + if (NewFilterElts.size() < NumTypeInfos) + MakeNewFilter = true; + } + if (MakeNewFilter) { + FilterType = ArrayType::get(FilterType->getElementType(), + NewFilterElts.size()); + FilterClause = ConstantArray::get(FilterType, NewFilterElts); + MakeNewInstruction = true; + } + + NewClauses.push_back(FilterClause); + + // If the new filter is empty then it will catch everything so there is + // no point in keeping any following clauses or marking the landingpad + // as having a cleanup. The case of the original filter being empty was + // already handled above. + if (MakeNewFilter && !NewFilterElts.size()) { + assert(MakeNewInstruction && "New filter but not a new instruction!"); + CleanupFlag = false; + break; + } + } + } + + // If several filters occur in a row then reorder them so that the shortest + // filters come first (those with the smallest number of elements). This is + // advantageous because shorter filters are more likely to match, speeding up + // unwinding, but mostly because it increases the effectiveness of the other + // filter optimizations below. + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + unsigned j; + // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. + for (j = i; j != e; ++j) + if (!isa<ArrayType>(NewClauses[j]->getType())) + break; + + // Check whether the filters are already sorted by length. We need to know + // if sorting them is actually going to do anything so that we only make a + // new landingpad instruction if it does. + for (unsigned k = i; k + 1 < j; ++k) + if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + // Not sorted, so sort the filters now. Doing an unstable sort would be + // correct too but reordering filters pointlessly might confuse users. + std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, + shorter_filter); + MakeNewInstruction = true; + break; + } + + // Look for the next batch of filters. + i = j + 1; + } + + // If typeinfos matched if and only if equal, then the elements of a filter L + // that occurs later than a filter F could be replaced by the intersection of + // the elements of F and L. In reality two typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some class + // derived from it) so it would be wrong to perform this transform in general. + // However the transform is correct and useful if F is a subset of L. In that + // case L can be replaced by F, and thus removed altogether since repeating a + // filter is pointless. So here we look at all pairs of filters F and L where + // L follows F in the list of clauses, and remove L if every element of F is + // an element of L. This can occur when inlining C++ functions with exception + // specifications. + for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { + // Examine each filter in turn. + Value *Filter = NewClauses[i]; + ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType()); + if (!FTy) + // Not a filter - skip it. + continue; + unsigned FElts = FTy->getNumElements(); + // Examine each filter following this one. Doing this backwards means that + // we don't have to worry about filters disappearing under us when removed. + for (unsigned j = NewClauses.size() - 1; j != i; --j) { + Value *LFilter = NewClauses[j]; + ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType()); + if (!LTy) + // Not a filter - skip it. + continue; + // If Filter is a subset of LFilter, i.e. every element of Filter is also + // an element of LFilter, then discard LFilter. + SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j; + // If Filter is empty then it is a subset of LFilter. + if (!FElts) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + // Move on to the next filter. + continue; + } + unsigned LElts = LTy->getNumElements(); + // If Filter is longer than LFilter then it cannot be a subset of it. + if (FElts > LElts) + // Move on to the next filter. + continue; + // At this point we know that LFilter has at least one element. + if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros. + // Filter is a subset of LFilter iff Filter contains only zeros (as we + // already know that Filter is not longer than LFilter). + if (isa<ConstantAggregateZero>(Filter)) { + assert(FElts <= LElts && "Should have handled this case earlier!"); + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + continue; + } + ConstantArray *LArray = cast<ConstantArray>(LFilter); + if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros. + // Since Filter is non-empty and contains only zeros, it is a subset of + // LFilter iff LFilter contains a zero. + assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); + for (unsigned l = 0; l != LElts; ++l) + if (LArray->getOperand(l)->isNullValue()) { + // LFilter contains a zero - discard it. + NewClauses.erase(J); + MakeNewInstruction = true; + break; + } + // Move on to the next filter. + continue; + } + // At this point we know that both filters are ConstantArrays. Loop over + // operands to see whether every element of Filter is also an element of + // LFilter. Since filters tend to be short this is probably faster than + // using a method that scales nicely. + ConstantArray *FArray = cast<ConstantArray>(Filter); + bool AllFound = true; + for (unsigned f = 0; f != FElts; ++f) { + Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); + AllFound = false; + for (unsigned l = 0; l != LElts; ++l) { + Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); + if (LTypeInfo == FTypeInfo) { + AllFound = true; + break; + } + } + if (!AllFound) + break; + } + if (AllFound) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + } + } + + // If we changed any of the clauses, replace the old landingpad instruction + // with a new one. + if (MakeNewInstruction) { + LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), + LI.getPersonalityFn(), + NewClauses.size()); + for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) + NLI->addClause(NewClauses[i]); + // A landing pad with no clauses must have the cleanup flag set. It is + // theoretically possible, though highly unlikely, that we eliminated all + // clauses. If so, force the cleanup flag to true. + if (NewClauses.empty()) + CleanupFlag = true; + NLI->setCleanup(CleanupFlag); + return NLI; + } + + // Even if none of the clauses changed, we may nonetheless have understood + // that the cleanup flag is pointless. Clear it if so. + if (LI.isCleanup() != CleanupFlag) { + assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); + LI.setCleanup(CleanupFlag); + return &LI; + } + + return 0; +} + @@ -1350,7 +1758,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { assert(I->hasOneUse() && "Invariants didn't hold!"); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) + if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() || + isa<TerminatorInst>(I)) return false; // Do not sink alloca instructions out of the entry block. @@ -1367,8 +1776,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { return false; } - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(InsertPos); ++NumSunkInst; return true; @@ -1503,27 +1911,29 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; - - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!isa<DbgInfoIntrinsic>(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Visited.count(BB)) continue; + + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; } + if (!isa<DbgInfoIntrinsic>(Inst)) { + ++NumDeadInst; + MadeIRChange = true; + } + Inst->eraseFromParent(); } + } } while (!Worklist.isEmpty()) { @@ -1604,13 +2014,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Everything uses the new instruction now. I->replaceAllUsesWith(Result); + // Move the name to the new instruction first. + Result->takeName(I); + // Push the new instruction and any users onto the worklist. Worklist.Add(Result); Worklist.AddUsersToWorkList(*Result); - // Move the name to the new instruction first. - Result->takeName(I); - // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I; diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 1d31fcc..e8ef265 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -74,7 +74,7 @@ bool EdgeProfiler::runOnModule(Module &M) { } } - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); + Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "EdgeProfCounters"); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 3f2c412..ccf7e11 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -60,11 +60,11 @@ namespace { bool runOnModule(Module &M); // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(DebugInfoFinder &DIF); + void emitGCNO(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. - bool emitProfileArcs(DebugInfoFinder &DIF); + bool emitProfileArcs(); // Get pointers to the functions in the runtime library. Constant *getStartFileFunc(); @@ -86,8 +86,7 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(DebugInfoFinder &, - SmallVector<std::pair<GlobalVariable *, + void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &); std::string mangleName(DICompileUnit CU, std::string NewStem); @@ -110,15 +109,6 @@ ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, return new GCOVProfiler(EmitNotes, EmitData, Use402Format); } -static DISubprogram findSubprogram(DIScope Scope) { - while (!Scope.isSubprogram()) { - assert(Scope.isLexicalBlock() && - "Debug location not lexical block or subprogram"); - Scope = DILexicalBlock(Scope).getContext(); - } - return DISubprogram(Scope); -} - namespace { class GCOVRecord { protected: @@ -177,18 +167,24 @@ namespace { } uint32_t length() { + // Here 2 = 1 for string lenght + 1 for '0' id#. return lengthOfGCOVString(Filename) + 2 + Lines.size(); } - private: - friend class GCOVBlock; + void writeOut() { + write(0); + writeGCOVString(Filename); + for (int i = 0, e = Lines.size(); i != e; ++i) + write(Lines[i]); + } - GCOVLines(std::string Filename, raw_ostream *os) - : Filename(Filename) { + GCOVLines(StringRef F, raw_ostream *os) + : Filename(F) { this->os = os; } - std::string Filename; + private: + StringRef Filename; SmallVector<uint32_t, 32> Lines; }; @@ -197,7 +193,7 @@ namespace { // other blocks. class GCOVBlock : public GCOVRecord { public: - GCOVLines &getFile(std::string Filename) { + GCOVLines &getFile(StringRef Filename) { GCOVLines *&Lines = LinesByFile[Filename]; if (!Lines) { Lines = new GCOVLines(Filename, os); @@ -220,13 +216,8 @@ namespace { write(Len); write(Number); for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), - E = LinesByFile.end(); I != E; ++I) { - write(0); - writeGCOVString(I->second->Filename); - for (int i = 0, e = I->second->Lines.size(); i != e; ++i) { - write(I->second->Lines[i]); - } - } + E = LinesByFile.end(); I != E; ++I) + I->second->writeOut(); write(0); write(0); } @@ -353,66 +344,66 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - DebugInfoFinder DIF; - DIF.processModule(M); - - if (EmitNotes) emitGCNO(DIF); - if (EmitData) return emitProfileArcs(DIF); + if (EmitNotes) emitGCNO(); + if (EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { +void GCOVProfiler::emitGCNO() { DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; - for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(), - E = DIF.compile_unit_end(); I != E; ++I) { - // Each compile unit gets its own .gcno file. This means that whether we run - // this pass over the original .o's as they're produced, or run it after - // LTO, we'll generate the same .gcno files. - - DICompileUnit CU(*I); - raw_fd_ostream *&out = GcnoFiles[CU]; - std::string ErrorInfo; - out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, - raw_fd_ostream::F_Binary); - if (!Use402Format) - out->write("oncg*404MVLL", 12); - else - out->write("oncg*402MVLL", 12); - } - - for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), - SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { - DISubprogram SP(*SPI); - raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()]; - - Function *F = SP.getFunction(); - if (!F) continue; - GCOVFunction Func(SP, os, Use402Format); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); - TerminatorInst *TI = BB->getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out->write("oncg*404MVLL", 12); + else + out->write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + raw_fd_ostream *&os = GcnoFiles[CU]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os, Use402Format); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } } - } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - - uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { - const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; - if (Line == Loc.getLine()) continue; - Line = Loc.getLine(); - if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue; - - GCOVLines &Lines = Block.getFile(SP.getFilename()); - Lines.addLine(Loc.getLine()); + Func.writeOut(); } } - Func.writeOut(); } for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator @@ -424,103 +415,107 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { } } -bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { - if (DIF.subprogram_begin() == DIF.subprogram_end()) - return false; - - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; - for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), - SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { - DISubprogram SP(*SPI); - Function *F = SP.getFunction(); - if (!F) continue; - - unsigned Edges = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - if (isa<ReturnInst>(TI)) - ++Edges; - else - Edges += TI->getNumSuccessors(); - } - - const ArrayType *CounterTy = +bool GCOVProfiler::emitProfileArcs() { + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) return false; + + bool Result = false; + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + DIArray SPs = CU.getSubprograms(); + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + Function *F = SP.getFunction(); + if (!F) continue; + if (!Result) Result = true; + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Edges); - GlobalVariable *Counters = + GlobalVariable *Counters = new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr", 0, false, 0); - CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); - - UniqueVector<BasicBlock *> ComplexEdgePreds; - UniqueVector<BasicBlock *> ComplexEdgeSuccs; - - unsigned Edge = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); - if (Successors) { - IRBuilder<> Builder(TI); - - if (Successors == 1) { - Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, - Edge); - Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); - Builder.CreateStore(Count, Counter); - } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - Value *Sel = Builder.CreateSelect( + CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); + + UniqueVector<BasicBlock *> ComplexEdgePreds; + UniqueVector<BasicBlock *> ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + Value *Sel = Builder.CreateSelect( BI->getCondition(), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); - SmallVector<Value *, 2> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); - Idx.push_back(Sel); - Value *Counter = Builder.CreateInBoundsGEP(Counters, - Idx.begin(), Idx.end()); - Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); - Builder.CreateStore(Count, Counter); - } else { - ComplexEdgePreds.insert(BB); - for (int i = 0; i != Successors; ++i) - ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + SmallVector<Value *, 2> Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; } - Edge += Successors; } - } - - if (!ComplexEdgePreds.empty()) { - GlobalVariable *EdgeTable = + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = buildEdgeLookupTable(F, Counters, ComplexEdgePreds, ComplexEdgeSuccs); - GlobalVariable *EdgeState = getEdgeStateValue(); - - const Type *Int32Ty = Type::getInt32Ty(*Ctx); - for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { - IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); - Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); - } - for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { - // call runtime to perform increment - IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI()); - Value *CounterPtrArray = + GlobalVariable *EdgeState = getEdgeStateValue(); + + Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + BasicBlock::iterator InsertPt = + ComplexEdgeSuccs[i+1]->getFirstInsertionPt(); + IRBuilder<> Builder(InsertPt); + Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); - Builder.CreateCall2(getIncrementIndirectCounterFunc(), - EdgeState, CounterPtrArray); - // clear the predecessor number - Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } } } + insertCounterWriteout(CountersBySP); } - - insertCounterWriteout(DIF, CountersBySP); - - return true; + return Result; } // All edges with successors that aren't branches are "complex", because it @@ -535,8 +530,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( // read it. Threads and invoke make this untrue. // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. - const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); - const ArrayType *EdgeTableTy = ArrayType::get( + Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + ArrayType *EdgeTableTy = ArrayType::get( Int64PtrTy, Succs.size() * Preds.size()); Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; @@ -572,7 +567,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } @@ -582,7 +577,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); } @@ -592,7 +587,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } @@ -602,13 +597,13 @@ Constant *GCOVProfiler::getEmitArcsFunc() { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } Constant *GCOVProfiler::getEndFileFunc() { - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); } @@ -626,9 +621,8 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - DebugInfoFinder &DIF, SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { - const FunctionType *WriteoutFTy = + FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, @@ -642,29 +636,31 @@ void GCOVProfiler::insertCounterWriteout( Constant *EmitArcs = getEmitArcsFunc(); Constant *EndFile = getEndFileFunc(); - for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), - CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { - DICompileUnit compile_unit(*CUI); - std::string FilenameGcda = mangleName(compile_unit, "gcda"); - Builder.CreateCall(StartFile, - Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit compile_unit(CU_Nodes->getOperand(i)); + std::string FilenameGcda = mangleName(compile_unit, "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - DISubprogram SP(I->second); - intptr_t ident = reinterpret_cast<intptr_t>(I->second); - Builder.CreateCall2(EmitFunction, - ConstantInt::get(Type::getInt32Ty(*Ctx), ident), - Builder.CreateGlobalStringPtr(SP.getName())); - - GlobalVariable *GV = I->first; - unsigned Arcs = + I != E; ++I) { + DISubprogram SP(I->second); + intptr_t ident = reinterpret_cast<intptr_t>(I->second); + Builder.CreateCall2(EmitFunction, + ConstantInt::get(Type::getInt32Ty(*Ctx), ident), + Builder.CreateGlobalStringPtr(SP.getName())); + + GlobalVariable *GV = I->first; + unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); - Builder.CreateCall2(EmitArcs, - ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), - Builder.CreateConstGEP2_64(GV, 0, 0)); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); } - Builder.CreateCall(EndFile); } Builder.CreateRetVoid(); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index e09f882..62c21b8 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -112,8 +112,8 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // be calculated from other edge counters on reading the profile info back // in. - const Type *Int32 = Type::getInt32Ty(M.getContext()); - const ArrayType *ATy = ArrayType::get(Int32, NumEdges); + Type *Int32 = Type::getInt32Ty(M.getContext()); + ArrayType *ATy = ArrayType::get(Int32, NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "OptEdgeProfCounters"); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp index 7541663..23915d3 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -374,7 +374,7 @@ namespace llvm { template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable, xcompile> { public: - static const StructType *get(LLVMContext& C) { + static StructType *get(LLVMContext& C) { return( StructType::get( TypeBuilder<types::i<32>, xcompile>::get(C), // type TypeBuilder<types::i<32>, xcompile>::get(C), // array size @@ -909,7 +909,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* pathNumber) { if(pathNumber == NULL || isa<ConstantInt>(pathNumber) || (((Instruction*)(pathNumber))->getParent()) != block) { - return(block->getFirstNonPHI()); + return(block->getFirstInsertionPt()); } else { Instruction* pathNumberInst = (Instruction*) (pathNumber); BasicBlock::iterator insertPoint; @@ -930,7 +930,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* // A PHINode is created in the node, and its values initialized to -1U. void PathProfiler::preparePHI(BLInstrumentationNode* node) { BasicBlock* block = node->getBlock(); - BasicBlock::iterator insertPoint = block->getFirstNonPHI(); + BasicBlock::iterator insertPoint = block->getFirstInsertionPt(); pred_iterator PB = pred_begin(node->getBlock()), PE = pred_end(node->getBlock()); PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), @@ -999,7 +999,7 @@ void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node, BasicBlock::iterator insertPoint; if( atBeginning ) - insertPoint = block->getFirstNonPHI(); + insertPoint = block->getFirstInsertionPt(); else insertPoint = block->getTerminator(); @@ -1029,8 +1029,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue, gepIndices[1] = incValue; GetElementPtrInst* pcPointer = - GetElementPtrInst::Create(dag->getCounterArray(), - gepIndices.begin(), gepIndices.end(), + GetElementPtrInst::Create(dag->getCounterArray(), gepIndices, "counterInc", insertPoint); // Load from the array - call it oldPC @@ -1140,7 +1139,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, } BasicBlock::iterator insertPoint = atBeginning ? - instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getFirstInsertionPt() : instrumentNode->getBlock()->getTerminator(); // add information from the bottom edge, if it exists @@ -1172,7 +1171,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, // Insert instrumentation if this is a normal edge else { BasicBlock::iterator insertPoint = atBeginning ? - instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getFirstInsertionPt() : instrumentNode->getBlock()->getTerminator(); if( edge->isInitialization() ) { // initialize path number @@ -1233,7 +1232,7 @@ void PathProfiler::insertInstrumentation( end = callEdges.end(); edge != end; edge++ ) { BLInstrumentationNode* node = (BLInstrumentationNode*)(*edge)->getSource(); - BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI(); + BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt(); // Find the first function call while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call ) @@ -1289,7 +1288,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, // Should we store the information in an array or hash if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) { - const Type* t = ArrayType::get(Type::getInt32Ty(*Context), + Type* t = ArrayType::get(Type::getInt32Ty(*Context), dag.getNumberOfPaths()); dag.setCounterArray(new GlobalVariable(M, t, false, @@ -1301,7 +1300,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, // Add to global function reference table unsigned type; - const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); + Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) type = ProfilingArray; @@ -1315,7 +1314,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) : Constant::getNullValue(voidPtr); - const StructType* at = ftEntryTypeBuilder::get(*Context); + StructType* at = ftEntryTypeBuilder::get(*Context); ConstantStruct* functionEntry = (ConstantStruct*)ConstantStruct::get(at, entryArray); ftInit.push_back(functionEntry); @@ -1379,8 +1378,8 @@ bool PathProfiler::runOnModule(Module &M) { runOnFunction(ftInit, *F, M); } - const Type *t = ftEntryTypeBuilder::get(*Context); - const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); + Type *t = ftEntryTypeBuilder::get(*Context); + ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit); DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n"); @@ -1388,7 +1387,7 @@ bool PathProfiler::runOnModule(Module &M) { GlobalVariable* functionTable = new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage, ftInitConstant, "functionPathTable"); - const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); + Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable, PointerType::getUnqual(eltType)); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 445a5b6..de57cd1 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -25,9 +25,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, GlobalValue *Array, PointerType *arrayType) { LLVMContext &Context = MainFn->getContext(); - const Type *ArgVTy = + Type *ArgVTy = PointerType::getUnqual(Type::getInt8PtrTy(Context)); - const PointerType *UIntPtr = arrayType ? arrayType : + PointerType *UIntPtr = arrayType ? arrayType : Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), @@ -51,8 +51,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, Constant::getNullValue(Type::getInt32Ty(Context))); unsigned NumElements = 0; if (Array) { - Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], - GEPIndices.size()); + Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices); NumElements = cast<ArrayType>(Array->getType()->getElementType())->getNumElements(); } else { @@ -108,7 +107,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : + BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() : BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -120,7 +119,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, Indices); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -137,7 +136,7 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { Type::getInt32Ty(Mod->getContext()), FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() }; - const StructType *GlobalDtorElemTy = + StructType *GlobalDtorElemTy = StructType::get(Mod->getContext(), GlobalDtorElems, false); // Construct the new element we'll be adding. diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp index a5adb5e..ba214d1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -57,6 +57,7 @@ bool ADCE::runOnFunction(Function& F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa<TerminatorInst>(I.getInstructionIterator()) || isa<DbgInfoIntrinsic>(I.getInstructionIterator()) || + isa<LandingPadInst>(I.getInstructionIterator()) || I->mayHaveSideEffects()) { alive.insert(I.getInstructionIterator()); worklist.push_back(I.getInstructionIterator()); @@ -65,7 +66,6 @@ bool ADCE::runOnFunction(Function& F) { // Propagate liveness backwards to operands. while (!worklist.empty()) { Instruction* curr = worklist.pop_back_val(); - for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); OI != OE; ++OI) if (Instruction* Inst = dyn_cast<Instruction>(OI)) diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 0af14ed..f8f18b2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -58,6 +58,7 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -104,12 +105,13 @@ namespace { void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB); bool OptimizeInst(Instruction *I); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); bool DupRetToEnableTailCallOpts(ReturnInst *RI); + bool PlaceDbgValues(Function &F); }; } @@ -132,6 +134,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= PlaceDbgValues(F); + bool MadeChange = true; while (MadeChange) { MadeChange = false; @@ -410,8 +417,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ CastInst *&InsertedCast = InsertedCasts[UserBB]; if (!InsertedCast) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); @@ -467,8 +473,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) { CmpInst *&InsertedCmp = InsertedCmps[UserBB]; if (!InsertedCmp) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCmp = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), @@ -528,7 +533,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); - const Type *ReturnTy = CI->getType(); + Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); // Substituting this can cause recursive simplifications, which can @@ -551,22 +556,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; - // llvm.dbg.value is far away from the value then iSel may not be able - // handle it properly. iSel will drop llvm.dbg.value if it can not - // find a node corresponding to the value. - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI)) - if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue())) - if (!VI->isTerminator() && - (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) { - DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); - DVI->removeFromParent(); - if (isa<PHINode>(VI)) - DVI->insertBefore(VI->getParent()->getFirstNonPHI()); - else - DVI->insertAfter(VI); - return true; - } - // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; @@ -724,7 +713,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - const Type *AccessTy) { + Type *AccessTy) { Value *Repl = Addr; // Try to collapse single-value PHI nodes. This is necessary to undo @@ -746,13 +735,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, worklist.pop_back(); // Break use-def graph loops. - if (Visited.count(V)) { + if (!Visited.insert(V)) { Consensus = 0; break; } - Visited.insert(V); - // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) @@ -763,7 +750,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = - AddressingModeMatcher::Match(V, AccessTy,MemoryInst, + AddressingModeMatcher::Match(V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI); // This check is broken into two cases with very similar code to avoid using @@ -822,7 +809,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Insert this computation right after this user. Since our caller is // scanning from the top of the BB to the bottom, reuse of the expr are // guaranteed to happen later. - BasicBlock::iterator InsertPt = MemoryInst; + IRBuilder<> Builder(MemoryInst); // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already @@ -833,11 +820,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) - SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); - const Type *IntPtrTy = + Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; @@ -850,10 +837,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseReg) { Value *V = AddrMode.BaseReg; if (V->getType()->isPointerTy()) - V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); if (V->getType() != IntPtrTy) - V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, - "sunkaddr", InsertPt); + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); Result = V; } @@ -863,29 +849,27 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (V->getType() == IntPtrTy) { // done. } else if (V->getType()->isPointerTy()) { - V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth()) { - V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) - V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, - AddrMode.Scale), - "sunkaddr", InsertPt); + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } // Add in the BaseGV if present. if (AddrMode.BaseGV) { - Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", - InsertPt); + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -894,7 +878,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -902,7 +886,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (Result == 0) SunkAddr = Constant::getNullValue(Addr->getType()); else - SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); } MemoryInst->replaceUsesOfWith(Repl, SunkAddr); @@ -1059,8 +1043,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; if (!InsertedTrunc) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); } @@ -1159,3 +1142,34 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { return MadeChange; } + +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::PlaceDbgValues(Function &F) { + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + Instruction *PrevNonDbgInst = NULL; + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + Instruction *Insn = BI; ++BI; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + if (!DVI) { + PrevNonDbgInst = Insn; + continue; + } + + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index cb9b5be..a593d0f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,18 +52,18 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DominatorTree &DT = getAnalysis<DominatorTree>(); - + bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT.isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - + AA = 0; MD = 0; return Changed; } - + bool runOnBasicBlock(BasicBlock &BB); bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); @@ -105,34 +105,34 @@ static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, SmallPtrSet<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; - + NowDeadInsts.push_back(I); --NumFastOther; - + // Before we touch this instruction, remove it from memdep! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); ++NumFastOther; - + // This instruction is dead, zap it, in stages. Start by removing it from // MemDep, which needs to know the operands and needs it to be in the // function. MD.removeInstruction(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast<Instruction>(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + if (ValueSet) ValueSet->erase(DeadInst); } while (!NowDeadInsts.empty()); } @@ -159,11 +159,13 @@ static bool hasMemoryWrite(Instruction *I) { } /// getLocForWrite - Return a Location stored to by the specified instruction. +/// If isRemovable returns true, this function and getLocForRead completely +/// describe the memory operations for this instruction. static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return AA.getLocation(SI); - + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. AliasAnalysis::Location Loc = AA.getLocationForDest(MI); @@ -174,10 +176,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { return AliasAnalysis::Location(); return Loc; } - + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); if (II == 0) return AliasAnalysis::Location(); - + switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. case Intrinsic::init_trampoline: @@ -185,7 +187,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. if (AA.getTargetData() == 0) return AliasAnalysis::Location(); - + // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return AliasAnalysis::Location(II->getArgOperand(0)); @@ -198,10 +200,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// getLocForRead - Return the location read by the specified "hasMemoryWrite" /// instruction if any. -static AliasAnalysis::Location +static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { assert(hasMemoryWrite(Inst) && "Unknown instruction case"); - + // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) @@ -213,10 +215,10 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) { /// isRemovable - If the value of this instruction and the memory it writes to /// is unused, may we delete this instruction? static bool isRemovable(Instruction *I) { - // Don't remove volatile stores. + // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return !SI->isVolatile(); - + return SI->isUnordered(); + IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate"); @@ -227,7 +229,7 @@ static bool isRemovable(Instruction *I) { case Intrinsic::init_trampoline: // Always safe to remove init_trampoline. return true; - + case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: @@ -255,16 +257,16 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { const TargetData *TD = AA.getTargetData(); if (TD == 0) return AliasAnalysis::UnknownSize; - + if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { // Get size information for the alloca if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); return AliasAnalysis::UnknownSize; } - + assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); - const PointerType *PT = cast<PointerType>(V->getType()); + PointerType *PT = cast<PointerType>(V->getType()); return TD->getTypeAllocSize(PT->getElementType()); } @@ -287,7 +289,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, AliasAnalysis &AA) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); - + // If the start pointers are the same, we just have to compare sizes to see if // the later store was larger than the earlier store. if (P1 == P2) { @@ -302,33 +304,33 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, return Later.Ptr->getType() == Earlier.Ptr->getType(); return false; } - + // Make sure that the Later size is >= the Earlier size. if (Later.Size < Earlier.Size) return false; return true; } - + // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || Later.Size <= Earlier.Size || AA.getTargetData() == 0) return false; - + // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any // other store to the same object. const TargetData &TD = *AA.getTargetData(); - + const Value *UO1 = GetUnderlyingObject(P1, &TD), *UO2 = GetUnderlyingObject(P2, &TD); - + // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) return false; - + // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = @@ -336,26 +338,26 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, if (ObjectSize == Later.Size) return true; } - + // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. int64_t EarlierOff = 0, LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); - + // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; // The later store completely overlaps the earlier store if: - // + // // 1. Both start at the same offset and the later one's size is greater than // or equal to the earlier one's, or // // |--earlier--| // |-- later --| - // + // // 2. The earlier store has an offset greater than the later offset, but which // still lies completely within the later store. // @@ -373,7 +375,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a /// memory region into an identical pointer) then it doesn't actually make its -/// input dead in the traditional sense. Consider this case: +/// input dead in the traditional sense. Consider this case: /// /// memcpy(A <- B) /// memcpy(A <- A) @@ -391,10 +393,10 @@ static bool isPossibleSelfRead(Instruction *Inst, // location read. AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. - + // If the read and written loc obviously don't alias, it isn't a read. if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - + // Okay, 'Inst' may copy over itself. However, we can still remove a the // DepWrite instruction if we can prove that it reads from the same location // as Inst. This handles useful cases like: @@ -404,10 +406,10 @@ static bool isPossibleSelfRead(Instruction *Inst, // aliases, so removing the first memcpy is safe (assuming it writes <= # // bytes as the second one. AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA); - + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) return false; - + // If DepWrite doesn't read memory or if we can't prove it is a must alias, // then it can't be considered dead. return true; @@ -420,43 +422,43 @@ static bool isPossibleSelfRead(Instruction *Inst, bool DSE::runOnBasicBlock(BasicBlock &BB) { bool MadeChange = false; - + // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - + // Handle 'free' calls specially. if (CallInst *F = isFreeCall(Inst)) { MadeChange |= HandleFree(F); continue; } - + // If we find something that writes memory, get its memory dependence. if (!hasMemoryWrite(Inst)) continue; MemDepResult InstDep = MD->getDependency(Inst); - + // Ignore any store where we can't find a local dependence. // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal() || InstDep.isUnknown()) + if (!InstDep.isDef() && !InstDep.isClobber()) continue; - + // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad && !SI->isVolatile()) { + SI->getOperand(0) == DepLoad && isRemovable(SI)) { DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); - + // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - + DeleteDeadInstruction(SI, *MD); - + if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. @@ -467,15 +469,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { } } } - + // Figure out what location is being stored to. AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. if (Loc.Ptr == 0) continue; - - while (!InstDep.isNonLocal() && !InstDep.isUnknown()) { + + while (InstDep.isDef() || InstDep.isClobber()) { // Get the memory clobbered by the instruction we depend on. MemDep will // skip any instructions that 'Loc' clearly doesn't interact with. If we // end up depending on a may- or must-aliased load, then we can't optimize @@ -496,12 +498,12 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); - + // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - + // DeleteDeadInstruction can delete the current instruction in loop // cases, reset BBI. BBI = Inst; @@ -509,7 +511,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { --BBI; break; } - + // If this is a may-aliased store that is clobbering the store value, we // can keep searching past it for another must-aliased pointer that stores // to the same location. For example, in: @@ -519,20 +521,20 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // we can remove the first store to P even though we don't know if P and Q // alias. if (DepWrite == &BB.front()) break; - + // Can't look past this instruction if it might read 'Loc'. if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref) break; - + InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); } } - + // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. if (BB.getTerminator()->getNumSuccessors() == 0) MadeChange |= handleEndBlock(BB); - + return MadeChange; } @@ -543,18 +545,18 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getDependency(F); - while (!Dep.isNonLocal() && !Dep.isUnknown()) { + while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) return MadeChange; - + Value *DepPointer = GetUnderlyingObject(getStoredPointerOperand(Dependency)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) return MadeChange; - + // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency, *MD); ++NumFastStores; @@ -567,7 +569,7 @@ bool DSE::HandleFree(CallInst *F) { // free(s); Dep = MD->getDependency(F); }; - + return MadeChange; } @@ -579,28 +581,28 @@ bool DSE::HandleFree(CallInst *F) { /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; - + // Keep track of all of the stack objects that are dead at the end of the // function. SmallPtrSet<Value*, 16> DeadStackObjects; - + // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) DeadStackObjects.insert(AI); - + // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) DeadStackObjects.insert(AI); - + // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; - + // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts @@ -609,10 +611,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Stores to stack values are valid candidates for removal. if (DeadStackObjects.count(Pointer)) { Instruction *Dead = BBI++; - + DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Object: " << *Pointer << '\n'); - + // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); ++NumFastStores; @@ -620,7 +622,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } } - + // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI)) { Instruction *Inst = BBI++; @@ -629,55 +631,61 @@ bool DSE::handleEndBlock(BasicBlock &BB) { MadeChange = true; continue; } - + if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { DeadStackObjects.erase(A); continue; } - + if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; - + // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the call site touches it. - AliasAnalysis::ModRefResult A = + AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - + if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } - + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) DeadStackObjects.erase(*I); - + // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) return MadeChange; - + continue; } - + AliasAnalysis::Location LoadedLoc; - + // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { + if (!L->isUnordered()) // Be conservative with atomic/volatile load + break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); - } else { - // Not a loading instruction. + } else if (!BBI->mayReadFromMemory()) { + // Instruction doesn't read memory. Note that stores that weren't removed + // above will hit this case. continue; + } else { + // Unknown inst; assume it clobbers everything. + break; } // Remove any allocas from the DeadPointer set that are loaded, as this @@ -689,7 +697,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (DeadStackObjects.empty()) break; } - + return MadeChange; } @@ -703,14 +711,14 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // A constant can't be in the dead pointer set. if (isa<Constant>(UnderlyingPointer)) return; - + // If the kill pointer can be easily reduced to an alloca, don't bother doing // extraneous AA queries. if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer)); return; } - + SmallVector<Value*, 16> NowLive; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 3d3f17b..c0223d2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -92,7 +92,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { // Hash in all of the operands as pointers. unsigned Res = 0; for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) - Res ^= getHash(Inst->getOperand(i)) << i; + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); if (CastInst *CI = dyn_cast<CastInst>(Inst)) Res ^= getHash(CI->getType()); @@ -185,7 +185,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { assert(!Inst->getOperand(i)->getType()->isMetadataTy() && "Cannot value number calls with metadata operands"); - Res ^= getHash(Inst->getOperand(i)) << i; + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); } // Mix in the opcode. @@ -357,7 +357,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If this is a non-volatile load, process it. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { // Ignore volatile loads. - if (LI->isVolatile()) { + if (!LI->isSimple()) { LastStore = 0; continue; } @@ -437,7 +437,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); // Remember that this was the last store we saw for DSE. - if (!SI->isVolatile()) + if (SI->isSimple()) LastStore = SI; } } diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 87b7317..cbfdbcd 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -41,12 +41,16 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumGVNSimpl, "Number of instructions simplified"); +STATISTIC(NumGVNEqProp, "Number of equalities propagated"); STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", @@ -63,7 +67,7 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type *type; + Type *type; SmallVector<uint32_t, 4> varargs; Expression(uint32_t o = ~2U) : opcode(o) { } @@ -548,6 +552,9 @@ namespace { void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); + unsigned replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root); + bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); }; char GVN::ID = 0; @@ -655,7 +662,7 @@ SpeculationFailure: /// CanCoerceMustAliasedValueToLoad - Return true if /// CoerceAvailableValueToLoadType will succeed. static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, - const Type *LoadTy, + Type *LoadTy, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. @@ -680,17 +687,17 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, /// /// If we can't do it, return null. static Value *CoerceAvailableValueToLoadType(Value *StoredVal, - const Type *LoadedTy, + Type *LoadedTy, Instruction *InsertPt, const TargetData &TD) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; // If this is already the right type, just return it. - const Type *StoredValTy = StoredVal->getType(); + Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { @@ -704,7 +711,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - const Type *TypeToCastTo = LoadedTy; + Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); @@ -743,7 +750,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, } // Truncate the integer to the right size now. - const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); if (LoadedTy == NewIntTy) @@ -765,7 +772,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, /// Check this case to see if there is anything more we can do before we give /// up. This returns -1 if we have to give up, or a byte number in the stored /// value of the piece that feeds the load. -static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, Value *WritePtr, uint64_t WriteSizeInBits, const TargetData &TD) { @@ -839,7 +846,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, /// AnalyzeLoadFromClobberingStore - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. -static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. @@ -856,7 +863,7 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, /// AnalyzeLoadFromClobberingLoad - This function is called when we have a /// memdep query of a load that ends up being clobbered by another load. See if /// the other load can feed into the second load. -static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const TargetData &TD){ // Cannot handle reading from store of first-class aggregate yet. if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) @@ -883,7 +890,7 @@ static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, -static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { // If the mem operation is a non-constant size, we can't handle it. @@ -920,7 +927,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); if (ConstantFoldLoadFromConstPtr(Src, &TD)) return Offset; @@ -934,7 +941,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// mustalias. Check this case to see if there is anything more we can do /// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, - const Type *LoadTy, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); @@ -946,10 +953,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPointerTy()) - SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); + SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), - "tmp"); + SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; @@ -959,11 +965,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, ShiftAmt = (StoreSize-LoadSize-Offset)*8; if (ShiftAmt) - SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); if (LoadSize != StoreSize) - SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), - "tmp"); + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } @@ -974,7 +979,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, /// because the pointers don't mustalias. Check this case to see if there is /// anything more we can do before we give up. static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, - const Type *LoadTy, Instruction *InsertPt, + Type *LoadTy, Instruction *InsertPt, GVN &gvn) { const TargetData &TD = *gvn.getTargetData(); // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to @@ -982,8 +987,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); if (Offset+LoadSize > SrcValSize) { - assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); - assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load"); + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); // If we have a load/load clobber an DepLI can be widened to cover this // load, then we should widen it to the next power of 2 size big enough! unsigned NewLoadSize = Offset+LoadSize; @@ -996,7 +1001,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, // memdep queries will find the new load. We can't easily remove the old // load completely because it is already in the value numbering table. IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - const Type *DestPTy = + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize*8); DestPTy = PointerType::get(DestPTy, cast<PointerType>(PtrVal->getType())->getAddressSpace()); @@ -1034,7 +1039,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - const Type *LoadTy, Instruction *InsertPt, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = LoadTy->getContext(); uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; @@ -1081,7 +1086,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); return ConstantFoldLoadFromConstPtr(Src, &TD); } @@ -1154,7 +1159,7 @@ struct AvailableValueInBlock { /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); @@ -1213,7 +1218,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - const Type *LoadTy = LI->getType(); + Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; @@ -1274,7 +1279,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) { + if (Deps.size() == 1 + && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) + { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1294,7 +1301,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); - if (DepInfo.isUnknown()) { + if (!DepInfo.isDef() && !DepInfo.isClobber()) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1359,7 +1366,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { continue; } - assert(DepInfo.isDef() && "Expecting def here"); + // DepInfo.isDef() here Instruction *DepInst = DepInfo.getInst(); @@ -1446,8 +1453,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); - // Lets find first basic block with more than one predecessor. Walk backwards - // through predecessors if needed. + // Let's find the first basic block with more than one predecessor. Walk + // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; @@ -1519,10 +1526,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { << Pred->getName() << "': " << *LI << '\n'); return false; } + + if (LoadBB->isLandingPad()) { + DEBUG(dbgs() + << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); } } + if (!NeedToSplit.empty()) { toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); return false; @@ -1660,7 +1676,7 @@ bool GVN::processLoad(LoadInst *L) { if (!MD) return false; - if (L->isVolatile()) + if (!L->isSimple()) return false; if (L->use_empty()) { @@ -1747,7 +1763,11 @@ bool GVN::processLoad(LoadInst *L) { return false; } - if (Dep.isUnknown()) { + // If it is defined in another block, try harder. + if (Dep.isNonLocal()) + return processNonLocalLoad(L); + + if (!Dep.isDef()) { DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; @@ -1757,12 +1777,6 @@ bool GVN::processLoad(LoadInst *L) { return false; } - // If it is defined in another block, try harder. - if (Dep.isNonLocal()) - return processNonLocalLoad(L); - - assert(Dep.isDef() && "Expecting def here"); - Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); @@ -1874,6 +1888,133 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { return Val; } +/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the +/// use is dominated by the given basic block. Returns the number of uses that +/// were replaced. +unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root) { + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ) { + Instruction *User = cast<Instruction>(*UI); + unsigned OpNum = UI.getOperandNo(); + ++UI; + + if (DT->dominates(Root, User->getParent())) { + User->setOperand(OpNum, To); + ++Count; + } + } + return Count; +} + +/// propagateEquality - The given values are known to be equal in every block +/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with +/// 'RHS' everywhere in the scope. Returns whether a change was made. +bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { + if (LHS == RHS) return false; + assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); + + // Don't try to propagate equalities between constants. + if (isa<Constant>(LHS) && isa<Constant>(RHS)) + return false; + + // Make sure that any constants are on the right-hand side. In general the + // best results are obtained by placing the longest lived value on the RHS. + if (isa<Constant>(LHS)) + std::swap(LHS, RHS); + + // If neither term is constant then bail out. This is not for correctness, + // it's just that the non-constant case is much less useful: it occurs just + // as often as the constant case but handling it hardly ever results in an + // improvement. + if (!isa<Constant>(RHS)) + return false; + + // If value numbering later deduces that an instruction in the scope is equal + // to 'LHS' then ensure it will be turned into 'RHS'. + addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root); + + // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. + unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); + bool Changed = NumReplacements > 0; + NumGVNEqProp += NumReplacements; + + // Now try to deduce additional equalities from this one. For example, if the + // known equality was "(A != B)" == "false" then it follows that A and B are + // equal in the scope. Only boolean equalities with an explicit true or false + // RHS are currently supported. + if (!RHS->getType()->isIntegerTy(1)) + // Not a boolean equality - bail out. + return Changed; + ConstantInt *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + // RHS neither 'true' nor 'false' - bail out. + return Changed; + // Whether RHS equals 'true'. Otherwise it equals 'false'. + bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownFalse = !isKnownTrue; + + // If "A && B" is known true then both A and B are known true. If "A || B" + // is known false then both A and B are known false. + Value *A, *B; + if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { + Changed |= propagateEquality(A, RHS, Root); + Changed |= propagateEquality(B, RHS, Root); + return Changed; + } + + // If we are propagating an equality like "(A == B)" == "true" then also + // propagate the equality A == B. + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { + // Only equality comparisons are supported. + if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || + (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) { + Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); + Changed |= propagateEquality(Op0, Op1, Root); + } + return Changed; + } + + return Changed; +} + +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, + DominatorTree *DT) { + // First off, there must not be more than one edge from Src to Dst, there + // should be exactly one. So keep track of the number of times Src occurs + // as a predecessor of Dst and fail if it's more than once. Secondly, any + // other predecessors of Dst should be dominated by Dst (see logic below). + bool SawEdgeFromSrc = false; + for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (Pred == Src) { + // An edge from Src to Dst. + if (SawEdgeFromSrc) + // There are multiple edges from Src to Dst - fail. + return false; + SawEdgeFromSrc = true; + continue; + } + // If the predecessor is not dominated by Dst, then it must be possible to + // reach it either without passing through Src (and thus not via the edge) + // or by passing through Src but taking a different edge out of Src. Either + // way it is possible to reach Dst without passing via the edge, so fail. + if (!DT->dominates(Dst, *PI)) + return false; + } + assert(SawEdgeFromSrc && "No edge between these basic blocks!"); + + // Every path from the entry block to Dst must at some point pass to Dst from + // a predecessor that is not dominated by Dst. This predecessor can only be + // Src, since all others are dominated by Dst. As there is only one edge from + // Src to Dst, the path passes by this edge. + return true; +} /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets @@ -1891,6 +2032,7 @@ bool GVN::processInstruction(Instruction *I) { if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(I); + ++NumGVNSimpl; return true; } @@ -1903,30 +2045,45 @@ bool GVN::processInstruction(Instruction *I) { return false; } - // For conditions branches, we can perform simple conditional propagation on + // For conditional branches, we can perform simple conditional propagation on // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { if (!BI->isConditional() || isa<Constant>(BI->getCondition())) return false; - + Value *BranchCond = BI->getCondition(); - uint32_t CondVN = VN.lookup_or_add(BranchCond); - + BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); - - if (TrueSucc->getSinglePredecessor()) - addToLeaderTable(CondVN, - ConstantInt::getTrue(TrueSucc->getContext()), - TrueSucc); - if (FalseSucc->getSinglePredecessor()) - addToLeaderTable(CondVN, - ConstantInt::getFalse(TrueSucc->getContext()), - FalseSucc); - - return false; + BasicBlock *Parent = BI->getParent(); + bool Changed = false; + + if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getTrue(TrueSucc->getContext()), + TrueSucc); + + if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getFalse(FalseSucc->getContext()), + FalseSucc); + + return Changed; } - + + // For switches, propagate the case values into the case destinations. + if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { + Value *SwitchCond = SI->getCondition(); + BasicBlock *Parent = SI->getParent(); + bool Changed = false; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + BasicBlock *Dst = SI->getSuccessor(i); + if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) + Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); + } + return Changed; + } + // Instructions with void type don't return a value, so there's // no point in trying to find redudancies in them. if (I->getType()->isVoidTy()) return false; @@ -2071,6 +2228,9 @@ bool GVN::performPRE(Function &F) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; + // Don't perform PRE on a landing pad. + if (CurrentBlock->isLandingPad()) continue; + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index dee3d38..75fa011 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -11,17 +11,6 @@ // computations derived from them) into simpler forms suitable for subsequent // analysis and transformation. // -// This transformation makes the following changes to each loop with an -// identifiable induction variable: -// 1. All loops are transformed to have a SINGLE canonical induction variable -// which starts at zero and steps by one. -// 2. The canonical induction variable is guaranteed to be the first PHI node -// in the loop header block. -// 3. The canonical induction variable is guaranteed to be in a wide enough -// type so that IV expressions need not be (directly) zero-extended or -// sign-extended. -// 4. Any pointer arithmetic recurrences are raised to use array subscripts. -// // If the trip count of a loop is computable, this pass also makes the following // changes: // 1. The exit condition for the loop is canonicalized to compare the @@ -33,9 +22,6 @@ // purpose of the loop is to compute the exit value of some derived // expression, this transformation will make the loop dead. // -// This transformation should be followed by strength reduction after all of the -// desired loop transformations have been performed. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "indvars" @@ -57,11 +43,11 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); @@ -69,15 +55,21 @@ STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumInserted , "Number of canonical indvars added"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); -STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); -STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); -STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -static cl::opt<bool> DisableIVRewrite( - "disable-iv-rewrite", cl::Hidden, - cl::desc("Disable canonical induction variable rewriting")); +namespace llvm { + cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + + // Trip count verification can be enabled by default under NDEBUG if we + // implement a strong expression equivalence checker in SCEV. Until then, we + // use the verify-indvars flag, which may assert in some cases. + cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); +} namespace { class IndVarSimplify : public LoopPass { @@ -105,12 +97,12 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - if (!DisableIVRewrite) + if (EnableIVRewrite) AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - if (!DisableIVRewrite) + if (EnableIVRewrite) AU.addPreserved<IVUsers>(); AU.setPreservesCFG(); } @@ -125,24 +117,14 @@ namespace { void HandleFloatingPointIV(Loop *L, PHINode *PH); void RewriteNonIntegerIVs(Loop *L); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - - void SimplifyIVUsers(SCEVExpander &Rewriter); - void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); + void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); - bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand); - void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); - void EliminateIVRemainder(BinaryOperator *Rem, - Value *IVOperand, - bool IsSigned); - - void SimplifyCongruentIVs(Loop *L); + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter); + Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); }; @@ -211,6 +193,36 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = 0; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + assert(InsertPt && "Missing phi operand"); + assert((!isa<Instruction>(Def) || + DT->dominates(cast<Instruction>(Def), InsertPt)) && + "def does not dominate all uses"); + return InsertPt; +} + //===----------------------------------------------------------------------===// // RewriteNonIntegerIVs and helpers. Prefer integer IVs. //===----------------------------------------------------------------------===// @@ -337,14 +349,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Positive and negative strides have different safety conditions. if (IncValue > 0) { // If we have a positive stride, we require the init to be less than the - // exit value and an equality or less than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) + // exit value. + if (InitValue >= ExitValue) return; uint32_t Range = uint32_t(ExitValue-InitValue); - if (NewPred == CmpInst::ICMP_SLE) { - // Normalize SLE -> SLT, check for infinite loop. + // Check for infinite loop, either: + // while (i <= Exit) or until (i > Exit) + if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { if (++Range == 0) return; // Range overflows. } @@ -364,14 +376,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { } else { // If we have a negative stride, we require the init to be greater than the - // exit value and an equality or greater than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) + // exit value. + if (InitValue <= ExitValue) return; uint32_t Range = uint32_t(InitValue-ExitValue); - if (NewPred == CmpInst::ICMP_SGE) { - // Normalize SGE -> SGT, check for infinite loop. + // Check for infinite loop, either: + // while (i >= Exit) or until (i < Exit) + if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { if (++Range == 0) return; // Range overflows. } @@ -390,7 +402,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { return; } - const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); @@ -429,7 +441,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstNonPHI()); + PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } @@ -437,6 +449,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Add a new IVUsers entry for the newly-created integer PHI. if (IU) IU->AddUsersIfInteresting(NewPHI); + + Changed = true; } void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { @@ -582,45 +596,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { //===----------------------------------------------------------------------===// // Rewrite IV users based on a canonical IV. -// To be replaced by -disable-iv-rewrite. +// Only for use with -enable-iv-rewrite. //===----------------------------------------------------------------------===// -/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this -/// loop. IVUsers is treated as a worklist. Each successive simplification may -/// push more users which may themselves be candidates for simplification. -/// -/// This is the old approach to IV simplification to be replaced by -/// SimplifyIVUsersNoRewrite. -/// -void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned); - continue; - } - } - } -} - -// FIXME: It is an extremely bad idea to indvar substitute anything more -// complex than affine induction variables. Doing so will put expensive -// polynomial evaluations inside of the loop, and the str reduction pass -// currently can only reduce affine polynomials. For now just disable -// indvar subst on anything more complex than an affine addrec, unless -// it can be expanded to a trivial value. +/// FIXME: It is an extremely bad idea to indvar substitute anything more +/// complex than affine induction variables. Doing so will put expensive +/// polynomial evaluations inside of the loop, and the str reduction pass +/// currently can only reduce affine polynomials. For now just disable +/// indvar subst on anything more complex than an affine addrec, unless +/// it can be expanded to a trivial value. static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { // Loop-invariant values are safe. if (SE->isLoopInvariant(S, L)) return true; @@ -631,7 +615,8 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { return AR->isAffine(); // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + if (const SCEVCommutativeExpr *Commutative + = dyn_cast<SCEVCommutativeExpr>(S)) { for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), E = Commutative->op_end(); I != E; ++I) if (!isSafe(*I, L, SE)) return false; @@ -665,7 +650,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); + Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. @@ -692,18 +677,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // hoist loop invariants out of the loop. For PHI nodes, there may be // multiple uses, so compute the nearest common dominator for the // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } + Instruction *InsertPt = getInsertPointForUses(User, Op, DT); // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); @@ -747,19 +721,38 @@ namespace { // extend operations. This information is recorded by CollectExtend and // provides the input to WidenIV. struct WideIVInfo { - const Type *WidestNativeType; // Widest integer type created [sz]ext - bool IsSigned; // Was an sext user seen before a zext? + PHINode *NarrowIV; + Type *WidestNativeType; // Widest integer type created [sz]ext + bool IsSigned; // Was an sext user seen before a zext? - WideIVInfo() : WidestNativeType(0), IsSigned(false) {} + WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + }; + + class WideIVVisitor : public IVVisitor { + ScalarEvolution *SE; + const TargetData *TD; + + public: + WideIVInfo WI; + + WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, + const TargetData *TData) : + SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } + + // Implement the interface used by simplifyUsersOfIV. + virtual void visitCast(CastInst *Cast); }; } -/// CollectExtend - Update information about the induction variable that is +/// visitCast - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, - ScalarEvolution *SE, const TargetData *TD) { - const Type *Ty = Cast->getType(); +void WideIVVisitor::visitCast(CastInst *Cast) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) + return; + + Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); if (TD && !TD->isLegalInteger(Width)) return; @@ -779,6 +772,21 @@ static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, } namespace { + +/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the +/// WideIV that computes the same value as the Narrow IV def. This avoids +/// caching Use* pointers. +struct NarrowIVDefUse { + Instruction *NarrowDef; + Instruction *NarrowUse; + Instruction *WideDef; + + NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): + NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} +}; + /// WidenIV - The goal of this transform is to remove sign and zero extends /// without creating any new induction variables. To do this, it creates a new /// phi of the wider type and redirects all users, either removing extends or @@ -787,7 +795,7 @@ namespace { class WidenIV { // Parameters PHINode *OrigPhi; - const Type *WideType; + Type *WideType; bool IsSigned; // Context @@ -803,13 +811,13 @@ class WidenIV { SmallVectorImpl<WeakVH> &DeadInsts; SmallPtrSet<Instruction*,16> Widened; - SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; public: - WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo, + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl<WeakVH> &DI) : - OrigPhi(PN), + OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), IsSigned(WI.IsSigned), LI(LInfo), @@ -826,21 +834,42 @@ public: PHINode *CreateWideIV(SCEVExpander &Rewriter); protected: - Instruction *CloneIVUser(Instruction *NarrowUse, - Instruction *NarrowDef, - Instruction *WideDef); + Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *CloneIVUser(NarrowIVDefUse DU); const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, - Instruction *WideDef); + const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); + + Instruction *WidenIVUse(NarrowIVDefUse DU); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace -static Value *getExtend( Value *NarrowOper, const Type *WideType, - bool IsSigned, IRBuilder<> &Builder) { +/// isLoopInvariant - Perform a quick domtree based check for loop invariance +/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems +/// gratuitous for this purpose. +static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { + Instruction *Inst = dyn_cast<Instruction>(V); + if (!Inst) + return true; + + return DT->properlyDominates(Inst->getParent(), L->getHeader()); +} + +Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : Builder.CreateZExt(NarrowOper, WideType); } @@ -848,10 +877,8 @@ static Value *getExtend( Value *NarrowOper, const Type *WideType, /// CloneIVUser - Instantiate a wide operation to replace a narrow /// operation. This only needs to handle operations that can evaluation to /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. -Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, - Instruction *NarrowDef, - Instruction *WideDef) { - unsigned Opcode = NarrowUse->getOpcode(); +Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { + unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: return 0; @@ -865,24 +892,23 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); - - IRBuilder<> Builder(NarrowUse); + DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n"); // Replace NarrowDef operands with WideDef. Otherwise, we don't know // anything about the narrow operand yet so must insert a [sz]ext. It is // probably loop invariant and will be folded or hoisted. If it actually // comes from a widened IV, it should be removed during a future call to // WidenIVUse. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : - getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : - getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); + Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse); + Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse); - BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); + BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse); BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, NarrowBO->getName()); + IRBuilder<> Builder(DU.NarrowUse); Builder.Insert(WideBO); if (const OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { @@ -894,45 +920,46 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, llvm_unreachable(0); } -/// HoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -static bool HoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) -{ - if (DT->dominates(IncV, InsertPos)) - return true; +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return it. +const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + if (DU.NarrowUse->getOpcode() != Instruction::Add) + return 0; - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = 0; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + if (IsSigned && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(!IsSigned && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return 0; - if (IncV->mayHaveSideEffects()) - return false; + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, + IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast<Instruction>(OI); - if (OInst && !DT->dominates(OInst, InsertPos)) - return false; - } - IncV->moveBefore(InsertPos); - return true; + if (!AddRec || AddRec->getLoop() != L) + return 0; + return AddRec; } -// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' -// perspective after widening it's type? In other words, can the extend be -// safely hoisted out of the loop with SCEV reducing the value to a recurrence -// on the same loop. If so, return the sign or zero extended -// recurrence. Otherwise return NULL. +/// GetWideRecurrence - Is this instruction potentially interesting from +/// IVUsers' perspective after widening it's type? In other words, can the +/// extend be safely hoisted out of the loop with SCEV reducing the value to a +/// recurrence on the same loop. If so, return the sign or zero extended +/// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) return 0; @@ -951,47 +978,45 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) return 0; - return AddRec; } /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, - Instruction *WideDef) { - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); +Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. - if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) + if (isa<PHINode>(DU.NarrowUse) && + LI->getLoopFor(DU.NarrowUse->getParent()) != L) return 0; // Our raison d'etre! Eliminate sign and zero extension. - if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { - Value *NewDef = WideDef; - if (NarrowUse->getType() != WideType) { - unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); + if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowDefUse); - NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); } else { // A wider extend was hidden behind a narrower one. This may induce // another round of IV widening in which the intermediate IV becomes // dead. It should be very rare. DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi - << " not wide enough to subsume " << *NarrowUse << "\n"); - NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); - NewDef = NarrowUse; + << " not wide enough to subsume " << *DU.NarrowUse << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; } } - if (NewDef != NarrowUse) { - DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse - << " replaced by " << *WideDef << "\n"); + if (NewDef != DU.NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); ++NumElimExt; - NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.push_back(NarrowUse); + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(DU.NarrowUse); } // Now that the extend is gone, we want to expose it's uses for potential // further simplification. We don't need to directly inform SimplifyIVUsers @@ -1004,29 +1029,32 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, } // Does this user itself evaluate to a recurrence after widening? - const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); + if (!WideAddRec) { + WideAddRec = GetExtendedOperandRecurrence(DU); + } if (!WideAddRec) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowDefUse); - Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); - NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); return 0; } - // We assume that block terminators are not SCEVable. We wouldn't want to + // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. - assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && "SCEV is not expected to evaluate a block terminator"); // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; - if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { + if (WideAddRec == WideIncExpr + && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT)) WideUse = WideInc; - } else { - WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); + WideUse = CloneIVUser(DU); if (!WideUse) return 0; } @@ -1051,13 +1079,13 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { for (Value::use_iterator UI = NarrowDef->use_begin(), UE = NarrowDef->use_end(); UI != UE; ++UI) { - Use &U = UI.getUse(); + Instruction *NarrowUse = cast<Instruction>(*UI); // Handle data flow merges and bizarre phi cycles. - if (!Widened.insert(cast<Instruction>(U.getUser()))) + if (!Widened.insert(NarrowUse)) continue; - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef)); + NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef)); } } @@ -1124,23 +1152,19 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { pushNarrowIVUsers(OrigPhi, WidePhi); while (!NarrowIVUsers.empty()) { - Use *UsePtr; - Instruction *WideDef; - tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); - Use &NarrowDefUse = *UsePtr; + NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); - Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); + Instruction *WideUse = WidenIVUse(DU); // Follow all def-use edges from the previous narrow use. if (WideUse) - pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse); + pushNarrowIVUsers(DU.NarrowUse, WideUse); // WidenIVUse may have removed the def-use edge. - if (NarrowDef->use_empty()) - DeadInsts.push_back(NarrowDef); + if (DU.NarrowDef->use_empty()) + DeadInsts.push_back(DU.NarrowDef); } return WidePhi; } @@ -1149,187 +1173,17 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// -void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { - unsigned IVOperIdx = 0; - ICmpInst::Predicate Pred = ICmp->getPredicate(); - if (IVOperand != ICmp->getOperand(0)) { - // Swapped - assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); - IVOperIdx = 1; - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); - const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else - return; - - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - ++NumElimCmp; - Changed = true; - DeadInsts.push_back(ICmp); -} - -void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, - Value *IVOperand, - bool IsSigned) { - // We're only interested in the case where we know something about - // the numerator. - if (IVOperand != Rem->getOperand(0)) - return; - - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(Rem->getOperand(0)); - const SCEV *X = SE->getSCEV(Rem->getOperand(1)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // i % n --> i if i is in [0,n). - if ((!IsSigned || SE->isKnownNonNegative(S)) && - SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S, X)) - Rem->replaceAllUsesWith(Rem->getOperand(0)); - else { - // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); - if (IsSigned && !SE->isKnownNonNegative(LessOne)) - return; - - if (!SE->isKnownPredicate(IsSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - LessOne, X)) - return; - - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, - Rem->getOperand(0), Rem->getOperand(1), - "tmp"); - SelectInst *Sel = - SelectInst::Create(ICmp, - ConstantInt::get(Rem->getType(), 0), - Rem->getOperand(0), "tmp", Rem); - Rem->replaceAllUsesWith(Sel); - } - - // Inform IVUsers about the new users. - if (IU) { - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); - } - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - ++NumElimRem; - Changed = true; - DeadInsts.push_back(Rem); -} - -/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has -/// no observable side-effect given the range of IV values. -bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, - Instruction *IVOperand) { - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - return true; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned); - return true; - } - } - - // Eliminate any operation that SCEV can prove is an identity function. - if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) - return false; - - DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); - - UseInst->replaceAllUsesWith(IVOperand); - ++NumElimIdentity; - Changed = true; - DeadInsts.push_back(UseInst); - return true; -} - -/// pushIVUsers - Add all uses of Def to the current IV's worklist. -/// -static void pushIVUsers( - Instruction *Def, - SmallPtrSet<Instruction*,16> &Simplified, - SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { - - for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - - // Avoid infinite or exponential worklist processing. - // Also ensure unique worklist users. - // If Def is a LoopPhi, it may not be in the Simplified set, so check for - // self edges first. - if (User != Def && Simplified.insert(User)) - SimpleIVUsers.push_back(std::make_pair(User, Def)); - } -} - -/// isSimpleIVUser - Return true if this instruction generates a simple SCEV -/// expression in terms of that IV. -/// -/// This is similar to IVUsers' isInsteresting() but processes each instruction -/// non-recursively when the operand is already known to be a simpleIVUser. -/// -static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { - if (!SE->isSCEVable(I->getType())) - return false; - - // Get the symbolic expression for this instruction. - const SCEV *S = SE->getSCEV(I); - - // We assume that terminators are not SCEVable. - assert((!S || I != I->getParent()->getTerminator()) && - "can't fold terminators"); - - // Only consider affine recurrences. - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); - if (AR && AR->getLoop() == L) - return true; - - return false; -} -/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist -/// of IV users. Each successive simplification may push more users which may +/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV +/// users. Each successive simplification may push more users which may /// themselves be candidates for simplification. /// -/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it -/// simplifies instructions in-place during analysis. Rather than rewriting -/// induction variables bottom-up from their users, it transforms a chain of -/// IVUsers top-down, updating the IR only when it encouters a clear -/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still -/// needed, but only used to generate a new IV (phi) of wider type for sign/zero -/// extend elimination. +/// Sign/Zero extend elimination is interleaved with IV simplification. /// -/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. -/// -void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { - std::map<PHINode *, WideIVInfo> WideIVMap; +void IndVarSimplify::SimplifyAndExtend(Loop *L, + SCEVExpander &Rewriter, + LPPassManager &LPM) { + SmallVector<WideIVInfo, 8> WideIVs; SmallVector<PHINode*, 8> LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { @@ -1345,108 +1199,81 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { // extension. The first time SCEV attempts to normalize sign/zero extension, // the result becomes final. So for the most predictable results, we delay // evaluation of sign/zero extend evaluation until needed, and avoid running - // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + // other SCEV based analysis prior to SimplifyAndExtend. do { PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - WideIVInfo WI; - - // Instructions processed by SimplifyIVUsers for CurrIV. - SmallPtrSet<Instruction*,16> Simplified; - - // Use-def pairs if IV users waiting to be processed for CurrIV. - SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; - - // Push users of the current LoopPhi. In rare cases, pushIVUsers may be - // called multiple times for the same LoopPhi. This is the proper thing to - // do for loop header phis that use each other. - pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + WideIVVisitor WIV(CurrIV, SE, TD); - while (!SimpleIVUsers.empty()) { - Instruction *UseInst, *Operand; - tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); - // Bypass back edges to avoid extra work. - if (UseInst == CurrIV) continue; + Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); - if (EliminateIVUser(UseInst, Operand)) { - pushIVUsers(Operand, Simplified, SimpleIVUsers); - continue; - } - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, IsSigned, WI, SE, TD); - } - continue; - } - if (isSimpleIVUser(UseInst, L, SE)) { - pushIVUsers(UseInst, Simplified, SimpleIVUsers); - } - } - if (WI.WidestNativeType) { - WideIVMap[CurrIV] = WI; + if (WIV.WI.WidestNativeType) { + WideIVs.push_back(WIV.WI); } } while(!LoopPhis.empty()); - for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), - E = WideIVMap.end(); I != E; ++I) { - WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); + for (; !WideIVs.empty(); WideIVs.pop_back()) { + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); } } - WideIVMap.clear(); } } -/// SimplifyCongruentIVs - Check for congruent phis in this loop header and -/// populate ExprToIVMap for use later. -/// -void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { - DenseMap<const SCEV *, PHINode *> ExprToIVMap; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - PHINode *Phi = cast<PHINode>(I); - if (!SE->isSCEVable(Phi->getType())) - continue; +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// - const SCEV *S = SE->getSCEV(Phi); - DenseMap<const SCEV *, PHINode *>::const_iterator Pos; - bool Inserted; - tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); - if (Inserted) - continue; - PHINode *OrigPhi = Pos->second; - // Replacing the congruent phi is sufficient because acyclic redundancy - // elimination, CSE/GVN, should handle the rest. However, once SCEV proves - // that a phi is congruent, it's almost certain to be the head of an IV - // user cycle that is isomorphic with the original phi. So it's worth - // eagerly cleaning up the common case of a single IV increment. - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - Instruction *OrigInc = - cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); - Instruction *IsomorphicInc = - cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); - if (OrigInc != IsomorphicInc && - SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && - HoistStep(OrigInc, IsomorphicInc, DT)) { - DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " - << *IsomorphicInc << '\n'); - IsomorphicInc->replaceAllUsesWith(OrigInc); - DeadInsts.push_back(IsomorphicInc); - } +/// Check for expressions that ScalarEvolution generates to compute +/// BackedgeTakenInfo. If these expressions have not been reduced, then +/// expanding them may incur additional cost (albeit in the loop preheader). +static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + ScalarEvolution *SE) { + // If the backedge-taken count is a UDiv, it's very likely a UDiv that + // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a + // precise expression, rather than a UDiv from the user's code. If we can't + // find a UDiv in the code with some simple searching, assume the former and + // forego rewriting the loop. + if (isa<SCEVUDivExpr>(S)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return true; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != S) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != S) + return true; } - DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); - ++NumElimIV; - Phi->replaceAllUsesWith(OrigPhi); - DeadInsts.push_back(Phi); } -} -//===----------------------------------------------------------------------===// -// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. -//===----------------------------------------------------------------------===// + if (EnableIVRewrite) + return false; + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansion(*I, BI, SE)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV patter, assume its an expression + // produced by program code. + return false; +} /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction @@ -1465,31 +1292,17 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { if (!BI) return false; - // Special case: If the backedge-taken count is a UDiv, it's very likely a - // UDiv that ScalarEvolution produced in order to compute a precise - // expression, rather than a UDiv from the user's code. If we can't find a - // UDiv in the code with some simple searching, assume the former and forego - // rewriting the loop. - if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return false; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != BackedgeTakenCount) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != BackedgeTakenCount) - return false; - } - } + if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + return false; + return true; } /// getBackedgeIVType - Get the widest type used by the loop test after peeking /// through Truncs. /// -/// TODO: Unnecessary if LFTR does not force a canonical IV. -static const Type *getBackedgeIVType(Loop *L) { +/// TODO: Unnecessary when ForceLFTR is removed. +static Type *getBackedgeIVType(Loop *L) { if (!L->getExitingBlock()) return 0; @@ -1502,7 +1315,7 @@ static const Type *getBackedgeIVType(Loop *L) { if (!Cond) return 0; - const Type *Ty = 0; + Type *Ty = 0; for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); OI != OE; ++OI) { assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); @@ -1515,12 +1328,187 @@ static const Type *getBackedgeIVType(Loop *L) { return Ty; } +/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop +/// invariant value to the phi. +static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { + Instruction *IncI = dyn_cast<Instruction>(IncV); + if (!IncI) + return 0; + + switch (IncI->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + break; + case Instruction::GetElementPtr: + // An IV counter must preserve its type. + if (IncI->getNumOperands() == 2) + break; + default: + return 0; + } + + PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(1), L, DT)) + return Phi; + return 0; + } + if (IncI->getOpcode() == Instruction::GetElementPtr) + return 0; + + // Allow add/sub to be commuted. + Phi = dyn_cast<PHINode>(IncI->getOperand(1)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(0), L, DT)) + return Phi; + } + return 0; +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { + assert(L->getExitingBlock() && "expected loop exit"); + + BasicBlock *LatchBlock = L->getLoopLatch(); + // Don't bother with LFTR if the loop is not properly simplified. + if (!LatchBlock) + return false; + + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + assert(BI && "expected exit branch"); + + // Do LFTR to simplify the exit condition to an ICMP. + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return true; + + // Do LFTR to simplify the exit ICMP to EQ/NE + ICmpInst::Predicate Pred = Cond->getPredicate(); + if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) + return true; + + // Look for a loop invariant RHS + Value *LHS = Cond->getOperand(0); + Value *RHS = Cond->getOperand(1); + if (!isLoopInvariant(RHS, L, DT)) { + if (!isLoopInvariant(LHS, L, DT)) + return true; + std::swap(LHS, RHS); + } + // Look for a simple IV counter LHS + PHINode *Phi = dyn_cast<PHINode>(LHS); + if (!Phi) + Phi = getLoopPhiForCounter(LHS, L, DT); + + if (!Phi) + return true; + + // Do LFTR if the exit condition's IV is *not* a simple counter. + Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch()); + return Phi != getLoopPhiForCounter(IncV, L, DT); +} + +/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to +/// be rewritten) loop exit test. +static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + + for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != IncV) return false; + } + + for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != Phi) return false; + } + return true; +} + +/// FindLoopCounter - Find an affine IV in canonical form. +/// +/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount +/// +/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. +/// This is difficult in general for SCEV because of potential overflow. But we +/// could at least handle constant BECounts. +static PHINode * +FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) { + // I'm not sure how BECount could be a pointer type, but we definitely don't + // want to LFTR that. + if (BECount->getType()->isPointerTy()) + return 0; + + uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); + + Value *Cond = + cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); + + // Loop over all of the PHI nodes, looking for a simple counter. + PHINode *BestPhi = 0; + const SCEV *BestInit = 0; + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "needsLFTR should guarantee a loop latch"); + + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); + if (!AR || AR->getLoop() != L || !AR->isAffine()) + continue; + + // AR may be a pointer type, while BECount is an integer type. + // AR may be wider than BECount. With eq/ne tests overflow is immaterial. + // AR may not be a narrower type, or we may never exit. + uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); + if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) + continue; + + const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); + if (!Step || !Step->isOne()) + continue; + + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + if (getLoopPhiForCounter(IncV, L, DT) != Phi) + continue; + + const SCEV *Init = AR->getStart(); + + if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { + // Don't force a live loop counter if another IV can be used. + if (AlmostDeadIV(Phi, LatchBlock, Cond)) + continue; + + // Prefer to count-from-zero. This is a more "canonical" counter form. It + // also prefers integer to pointer IVs. + if (BestInit->isZero() != Init->isZero()) { + if (BestInit->isZero()) + continue; + } + // If two IVs both count from zero or both count from nonzero then the + // narrower is likely a dead phi that has been widened. Use the wider phi + // to allow the other to be eliminated. + if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + continue; + } + BestPhi = Phi; + BestInit = Init; + } + return BestPhi; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: +Value *IndVarSimplify:: LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, @@ -1528,62 +1516,117 @@ LinearFunctionTestReplace(Loop *L, assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + // LFTR can ignore IV overflow and truncate to the width of + // BECount. This avoids materializing the add(zext(add)) expression. + Type *CntTy = !EnableIVRewrite ? + BackedgeTakenCount->getType() : IndVar->getType(); + + const SCEV *IVLimit = BackedgeTakenCount; + // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1)); + if (CntTy == IVLimit->getType()) + IVLimit = N; + else { + const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + IVLimit = SE->getTruncateOrZeroExtend(N, CntTy); + } else { + // Potential overflow. Cast before doing the add. + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1)); + } } - // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); CmpIndVar = IndVar; } + // For unit stride, IVLimit = Start + BECount with 2's complement overflow. + // So for, non-zero start compute the IVLimit here. + bool isPtrIV = false; + Type *CmpTy = CntTy; + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); + assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); + if (!AR->getStart()->isZero()) { + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); + const SCEV *IVInit = AR->getStart(); + + // For pointer types, sign extend BECount in order to materialize a GEP. + // Note that for without EnableIVRewrite, we never run SCEVExpander on a + // pointer type, because we must preserve the existing GEPs. Instead we + // directly generate a GEP later. + if (IVInit->getType()->isPointerTy()) { + isPtrIV = true; + CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); + IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); + } + // For integer types, truncate the IV before computing IVInit + BECount. + else { + if (SE->getTypeSizeInBits(IVInit->getType()) + > SE->getTypeSizeInBits(CmpTy)) + IVInit = SE->getTruncateExpr(IVInit, CmpTy); + + IVLimit = SE->getAddExpr(IVInit, IVLimit); + } + } // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && + IRBuilder<> Builder(BI); + + assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); + + // Create a gep for IVInit + IVLimit from on an existing pointer base. + assert(isPtrIV == IndVar->getType()->isPointerTy() && + "IndVar type must match IVInit type"); + if (isPtrIV) { + Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); + assert(SE->getSizeOfExpr( + cast<PointerType>(IVStart->getType())->getElementType())->isOne() + && "unit stride pointer IV must be i8*"); + + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); + Builder.SetInsertPoint(BI); + } // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; + ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; + P = ICmpInst::ICMP_NE; else - Opcode = ICmpInst::ICMP_EQ; + P = ICmpInst::ICMP_EQ; DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *ExitCnt << "\n" + << " Expr:\t" << *IVLimit << "\n"); + + if (SE->getTypeSizeInBits(CmpIndVar->getType()) + > SE->getTypeSizeInBits(CmpTy)) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv"); + } - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); - Cond->setDebugLoc(BI->getDebugLoc()); + Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); Value *OrigCond = BI->getCondition(); // It's tempting to use replaceAllUsesWith here to fully replace the old // comparison, but that's not immediately safe, since users of the old @@ -1612,7 +1655,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) return; - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); + Instruction *InsertPt = ExitBlock->getFirstInsertionPt(); BasicBlock::iterator I = Preheader->getTerminator(); while (I != Preheader->begin()) { --I; @@ -1633,6 +1676,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { if (isa<DbgInfoIntrinsic>(I)) continue; + // Skip landingpad instructions. + if (isa<LandingPadInst>(I)) + continue; + // Don't sink static AllocaInsts out of the entry block, which would // turn them into dynamic allocas! if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) @@ -1699,7 +1746,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - if (!DisableIVRewrite) + if (EnableIVRewrite) IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); @@ -1717,6 +1764,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif // Eliminate redundant IV users. // @@ -1724,9 +1774,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // attempt to avoid evaluating SCEVs for sign/zero extend operations until // other expressions involving loop IVs have been evaluated. This helps SCEV // set no-wrap flags before normalizing sign/zero extension. - if (DisableIVRewrite) { + if (!EnableIVRewrite) { Rewriter.disableCanonicalMode(); - SimplifyIVUsersNoRewrite(L, Rewriter); + SimplifyAndExtend(L, Rewriter, LPM); } // Check to see if this loop has a computable loop-invariant execution count. @@ -1739,25 +1789,25 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - if (!DisableIVRewrite) - SimplifyIVUsers(Rewriter); + if (EnableIVRewrite) + Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts); // Eliminate redundant IV cycles. - if (DisableIVRewrite) - SimplifyCongruentIVs(L); + if (!EnableIVRewrite) + NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. - const Type *LargestType = 0; + Type *LargestType = 0; bool NeedCannIV = false; bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); - if (ExpandBECount) { + if (EnableIVRewrite && ExpandBECount) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. NeedCannIV = true; - const Type *Ty = BackedgeTakenCount->getType(); - if (DisableIVRewrite) { + Type *Ty = BackedgeTakenCount->getType(); + if (!EnableIVRewrite) { // In this mode, SimplifyIVUsers may have already widened the IV used by // the backedge test and inserted a Trunc on the compare's operand. Get // the wider type to avoid creating a redundant narrow IV only used by the @@ -1769,10 +1819,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE->getTypeSizeInBits(LargestType)) LargestType = SE->getEffectiveSCEVType(Ty); } - if (!DisableIVRewrite) { + if (EnableIVRewrite) { for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { NeedCannIV = true; - const Type *Ty = + Type *Ty = SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > @@ -1811,18 +1861,16 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // the end of the pass. while (!OldCannIVs.empty()) { PHINode *OldCannIV = OldCannIVs.pop_back_val(); - OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); + OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt()); } } - + else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) { + IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - ICmpInst *NewICmp = 0; - if (ExpandBECount) { - assert(canExpandBackedgeTakenCount(L, SE) && - "canonical IV disrupted BackedgeTaken expansion"); - assert(NeedCannIV && - "LinearFunctionTestReplace requires a canonical induction variable"); + Value *NewICmp = 0; + if (ExpandBECount && IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any // pass that uses the SCEVExpander must do it. This does not work well for @@ -1837,7 +1885,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } // Rewrite IV-derived expressions. - if (!DisableIVRewrite) + if (EnableIVRewrite) RewriteIVExpressions(L, Rewriter); // Clear the rewriter cache, because values that are in the rewriter's cache @@ -1860,12 +1908,34 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. - if (NewICmp && IU) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); - + if (IU && NewICmp) { + ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp); + if (NewICmpInst) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0))); + } // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. - assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); + assert(L->isLCSSAForm(*DT) && + "Indvars did not leave the loop in lcssa form!"); + + // Verify that LFTR, and any other change have not interfered with SCEV's + // ability to compute trip count. +#ifndef NDEBUG + if (!EnableIVRewrite && VerifyIndvars && + !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { + SE->forgetLoop(L); + const SCEV *NewBECount = SE->getBackedgeTakenCount(L); + if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < + SE->getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE->getTruncateOrNoop(NewBECount, + BackedgeTakenCount->getType()); + else + BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, + NewBECount->getType()); + assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); + } +#endif + return Changed; } diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index b500d5b..f410af3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -811,8 +811,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { /// important optimization that encourages jump threading, and needs to be run /// interlaced with other jump threading tasks. bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { - // Don't hack volatile loads. - if (LI->isVolatile()) return false; + // Don't hack volatile/atomic loads. + if (!LI->isSimple()) return false; // If the load is defined in a block with exactly one predecessor, it can't be // partially redundant. diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 66add6c..b79bb13 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -151,6 +151,11 @@ namespace { /// bool isSafeToExecuteUnconditionally(Instruction &I); + /// isGuaranteedToExecute - Check that the instruction is guaranteed to + /// execute. + /// + bool isGuaranteedToExecute(Instruction &I); + /// pointerInvalidatedByLoop - Return true if the body of this loop may /// store into the memory location pointed to by V. /// @@ -357,8 +362,8 @@ void LICM::HoistRegion(DomTreeNode *N) { bool LICM::canSinkOrHoistInst(Instruction &I) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - if (LI->isVolatile()) - return false; // Don't hoist volatile loads! + if (!LI->isUnordered()) + return false; // Don't hoist volatile/atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. @@ -461,7 +466,7 @@ void LICM::sink(Instruction &I) { } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); + I.moveBefore(ExitBlocks[0]->getFirstInsertionPt()); // This instruction is no longer in the AST for the current loop, because // we just sunk it out of the loop. If we just sunk it into an outer @@ -504,7 +509,7 @@ void LICM::sink(Instruction &I) { continue; // Insert the code after the last PHI node. - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert @@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { if (Inst.isSafeToSpeculativelyExecute()) return true; + return isGuaranteedToExecute(Inst); +} + +bool LICM::isGuaranteedToExecute(Instruction &Inst) { // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. @@ -635,7 +644,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -713,34 +722,41 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // If there is an non-load/store instruction in the loop, we can't promote // it. - unsigned InstAlignment; if (LoadInst *load = dyn_cast<LoadInst>(Use)) { - assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); - InstAlignment = load->getAlignment(); + assert(!load->isVolatile() && "AST broken"); + if (!load->isSimple()) + return; } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; - InstAlignment = store->getAlignment(); - assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); + assert(!store->isVolatile() && "AST broken"); + if (!store->isSimple()) + return; + + // Note that we only check GuaranteedToExecute inside the store case + // so that we do not introduce stores where they did not exist before + // (which would break the LLVM concurrency model). + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + unsigned InstAlignment = store->getAlignment(); + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isGuaranteedToExecute(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + + if (!GuaranteedToExecute) + GuaranteedToExecute = isGuaranteedToExecute(*Use); + } else return; // Not a load or store. - // If the alignment of this instruction allows us to specify a more - // restrictive (and performant) alignment and if we are sure this - // instruction will be executed, update the alignment. - // Larger is better, with the exception of 0 being the best alignment. - if ((InstAlignment > Alignment || InstAlignment == 0) - && (Alignment != 0)) - if (isSafeToExecuteUnconditionally(*Use)) { - GuaranteedToExecute = true; - Alignment = InstAlignment; - } - - if (!GuaranteedToExecute) - GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - LoopUses.push_back(Use); } } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a0e41d9..ad15cbb 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -267,7 +267,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, /// processLoopStore - See if this store can be promoted to a memset or memcpy. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -314,7 +314,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && - StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile()) + StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) return true; } @@ -463,7 +463,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, SplatValue = 0; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't - // do anything with a 3-byte store, for example. + // do anything with a 3-byte store. return false; } @@ -498,7 +498,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), @@ -604,7 +604,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - const Type *IntPtr = TD->getIntPtrType(SI->getContext()); + Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 509d026..3e122c2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -70,12 +70,27 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; +namespace llvm { +cl::opt<bool> EnableNested( + "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); + +cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); + +// Temporary flag to cleanup congruent phis after LSR phi expansion. +// It's currently disabled until we can determine whether it's truly useful or +// not. The flag should be removed after the v3.0 release. +cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); +} + namespace { /// RegSortData - This class holds data which is used to order reuse candidates. @@ -219,7 +234,7 @@ struct Formula { void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); unsigned getNumRegs() const; - const Type *getType() const; + Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -319,7 +334,7 @@ unsigned Formula::getNumRegs() const { /// getType - Return the type of this formula, if it has one, or null /// otherwise. This type is meaningless except for the bit size. -const Type *Formula::getType() const { +Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : AM.BaseGV ? AM.BaseGV->getType() : @@ -397,7 +412,7 @@ void Formula::dump() const { /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); } @@ -405,7 +420,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { /// isAddSExtable - Return true if the given add can be sign-extended /// without changing its value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -413,7 +428,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { /// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); @@ -594,8 +609,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { } /// getAccessType - Return the type of the memory being accessed. -static const Type *getAccessType(const Instruction *Inst) { - const Type *AccessTy = Inst->getType(); +static Type *getAccessType(const Instruction *Inst) { + Type *AccessTy = Inst->getType(); if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) AccessTy = SI->getOperand(0)->getType(); else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -614,7 +629,7 @@ static const Type *getAccessType(const Instruction *Inst) { // All pointers have the same requirements, so canonicalize them to an // arbitrary pointer type to minimize variation. - if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + if (PointerType *PTy = dyn_cast<PointerType>(AccessTy)) AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), PTy->getAddressSpace()); @@ -670,6 +685,21 @@ public: void Loose(); +#ifndef NDEBUG + // Once any of the metrics loses, they must all remain losers. + bool isValid() { + return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds + | ImmCost | SetupCost) != ~0u) + || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds + & ImmCost & SetupCost) == ~0u); + } +#endif + + bool isLoser() { + assert(isValid() && "invalid cost"); + return NumRegs == ~0u; + } + void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 16> &Regs, const DenseSet<const SCEV *> &VisitedRegs, @@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg, if (AR->getLoop() == L) AddRecCost += 1; /// TODO: This should be a function of the stride. - // If this is an addrec for a loop that's already been visited by LSR, - // don't second-guess its addrec phi nodes. LSR isn't currently smart - // enough to reason about more than one loop at a time. Consider these - // registers free and leave them alone. - else if (L->contains(AR->getLoop()) || + // If this is an addrec for another loop, don't second-guess its addrec phi + // nodes. LSR isn't currently smart enough to reason about more than one + // loop at a time. LSR has either already run on inner loops, will not run + // on other loops, and cannot be expected to change sibling loops. If the + // AddRec exists, consider it's register free and leave it alone. Otherwise, + // do not consider this formula at all. + // FIXME: why do we need to generate such fomulae? + else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (SE.isSCEVable(PN->getType()) && (SE.getEffectiveSCEVType(PN->getType()) == SE.getEffectiveSCEVType(AR->getType())) && SE.getSCEV(PN) == AR) return; - + } + if (!EnableNested) { + Loose(); + return; + } // If this isn't one of the addrecs that the loop already has, it // would require a costly new phi and add. TODO: This isn't // precisely modeled right now. ++NumBaseAdds; - if (!Regs.count(AR->getStart())) + if (!Regs.count(AR->getStart())) { RateRegister(AR->getStart(), Regs, L, SE, DT); + if (isLoser()) + return; + } } // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. - if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) - if (!Regs.count(AR->getStart())) + if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { + if (!Regs.count(AR->getOperand(1))) { RateRegister(AR->getOperand(1), Regs, L, SE, DT); + if (isLoser()) + return; + } + } } ++NumRegs; @@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + if (isLoser()) + return; } for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { @@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + if (isLoser()) + return; } // Determine how many (unfolded) adds we'll need inside the loop. @@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F, else if (Offset != 0) ImmCost += APInt(64, Offset, true).getMinSignedBits(); } + assert(isValid() && "invalid cost"); } /// Loose - Set this cost to a losing value. @@ -980,7 +1029,7 @@ public: }; KindType Kind; - const Type *AccessTy; + Type *AccessTy; SmallVector<int64_t, 8> Offsets; int64_t MinOffset; @@ -995,7 +1044,7 @@ public: /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different /// max fixup widths to be equivalent, because the narrower one may be relying /// on the implicit truncation to truncate away bogus bits. - const Type *WidestFixupType; + Type *WidestFixupType; /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and @@ -1005,7 +1054,7 @@ public: /// Regs - The set of register candidates used by all formulae in this LSRUse. SmallPtrSet<const SCEV *, 4> Regs; - LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), + LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), @@ -1127,7 +1176,7 @@ void LSRUse::dump() const { /// be completely folded into the user instruction at isel time. This includes /// address-mode folding and special icmp tricks. static bool isLegalUse(const TargetLowering::AddrMode &AM, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { switch (Kind) { case LSRUse::Address: @@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs); + if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs); return false; } @@ -1176,7 +1225,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, static bool isLegalUse(TargetLowering::AddrMode AM, int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != @@ -1198,7 +1247,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM, static bool isAlwaysFoldable(int64_t BaseOffs, GlobalValue *BaseGV, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Fast-path: zero is always foldable. if (BaseOffs == 0 && !BaseGV) return true; @@ -1224,7 +1273,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs, static bool isAlwaysFoldable(const SCEV *S, int64_t MinOffset, int64_t MaxOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI, ScalarEvolution &SE) { // Fast-path: zero is always foldable. @@ -1299,7 +1348,7 @@ class LSRInstance { SmallSetVector<int64_t, 8> Factors; /// Types - Interesting use types, to facilitate truncation reuse. - SmallSetVector<const Type *, 4> Types; + SmallSetVector<Type *, 4> Types; /// Fixups - The list of operands which are to be replaced. SmallVector<LSRFixup, 16> Fixups; @@ -1330,11 +1379,11 @@ class LSRInstance { UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy); + LSRUse::KindType Kind, Type *AccessTy); std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind, - const Type *AccessTy); + Type *AccessTy); void DeleteUse(LSRUse &LU, size_t LUIdx); @@ -1426,7 +1475,8 @@ void LSRInstance::OptimizeShadowIV() { IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - const Type *DestTy = NULL; + Type *DestTy = NULL; + bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV to eliminate this cast. @@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() { for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) { + IsSigned = false; DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) + } + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) { + IsSigned = true; DestTy = SCast->getDestTy(); + } if (!DestTy) continue; if (TLI) { @@ -1457,7 +1511,7 @@ void LSRInstance::OptimizeShadowIV() { if (!PH) continue; if (PH->getNumIncomingValues() != 2) continue; - const Type *SrcTy = PH->getType(); + Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); if (Mantissa == -1) continue; if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) @@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? + (double)Init->getSExtValue() : + (double)Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -1776,7 +1832,7 @@ LSRInstance::OptimizeLoopTermCond() { if (!TLI) goto decline_post_inc; // Check for possible scaled-address reuse. - const Type *AccessTy = getAccessType(UI->getUser()); + Type *AccessTy = getAccessType(UI->getUser()); TargetLowering::AddrMode AM; AM.Scale = C->getSExtValue(); if (TLI->isLegalAddressingMode(AM, AccessTy)) @@ -1840,10 +1896,10 @@ LSRInstance::OptimizeLoopTermCond() { /// return true. bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; - const Type *NewAccessTy = AccessTy; + Type *NewAccessTy = AccessTy; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -1882,7 +1938,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, /// Either reuse an existing use or create a new one, as needed. std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); @@ -2044,7 +2100,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - const Type *AccessTy = 0; + Type *AccessTy = 0; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2464,7 +2520,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (LU.Kind != LSRUse::ICmpZero) return; // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; if (SE.getTypeSizeInBits(IntTy) > 64) return; @@ -2538,7 +2594,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, /// scaled-offset address modes, for example. void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. @@ -2598,13 +2654,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (Base.AM.BaseGV) return; // Determine the integer type for the base formula. - const Type *DstTy = Base.getType(); + Type *DstTy = Base.getType(); if (!DstTy) return; DstTy = SE.getEffectiveSCEVType(DstTy); - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { - const Type *SrcTy = *I; + Type *SrcTy = *I; if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { Formula F = Base; @@ -2741,7 +2797,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { int64_t Imm = WI.Imm; const SCEV *OrigReg = WI.OrigReg; - const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); + Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); @@ -3275,6 +3331,9 @@ retry: skip:; } + if (!EnableRetry && !AnySatisfiedReqRegs) + return; + // If none of the formulae had all of the required registers, relax the // constraint so that we don't exclude all formulae. if (!AnySatisfiedReqRegs) { @@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { // SolveRecurse does all the work. SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); + if (Solution.empty()) { + DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); + return; + } // Ok, we've now made all our decisions. DEBUG(dbgs() << "\n" @@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, // Don't insert instructions before PHI nodes. while (isa<PHINode>(IP)) ++IP; + // Ignore landingpad instructions. + while (isa<LandingPadInst>(IP)) ++IP; + // Ignore debug intrinsics. while (isa<DbgInfoIntrinsic>(IP)) ++IP; @@ -3440,9 +3506,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); // This will be the type that we'll initially expand to. - const Type *Ty = F.getType(); + Type *Ty = F.getType(); if (!Ty) // No type known; just expand directly to the ultimate type. Ty = OpTy; @@ -3450,7 +3516,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand directly to the ultimate type if it's the right size. Ty = OpTy; // This is the type to do integer arithmetic in. - const Type *IntTy = SE.getEffectiveSCEVType(Ty); + Type *IntTy = SE.getEffectiveSCEVType(Ty); // Build up a list of operands to add together to form the full base. SmallVector<const SCEV *, 8> Ops; @@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // The other interesting way of "folding" with an ICmpZero is to use a // negated immediate. if (!ICmpScaledV) - ICmpScaledV = ConstantInt::get(IntTy, -Offset); + ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); else { Ops.push_back(SE.getUnknown(ICmpScaledV)); ICmpScaledV = ConstantInt::get(IntTy, Offset); @@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN, // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BB->getTerminator())) { - Loop *PNLoop = LI.getLoopFor(PN->getParent()); - if (!PNLoop || PN->getParent() != PNLoop->getHeader()) { + BasicBlock *Parent = PN->getParent(); + Loop *PNLoop = LI.getLoopFor(Parent); + if (!PNLoop || Parent != PNLoop->getHeader()) { // Split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + BasicBlock *NewBB = 0; + if (!Parent->isLandingPad()) { + NewBB = SplitCriticalEdge(BB, Parent, P, + /*MergeIdenticalEdges=*/true, + /*DontDeleteUselessPhis=*/true); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs); + NewBB = NewBBs[0]; + } // If PN is outside of the loop and BB is in the loop, we want to // move the block to be immediately before the PHI block, not @@ -3637,7 +3713,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) FullV = CastInst::Create(CastInst::getCastOpcode(FullV, false, @@ -3667,7 +3743,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF, Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) { Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), @@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Expand the new value definitions and update the users. @@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) OptimizeShadowIV(); OptimizeLoopTermCond(); + // If loop preparation eliminates all interesting IV users, bail. + if (IU.empty()) return; + + // Skip nested loops until we can model them better with formulae. + if (!EnableNested && !L->empty()) { + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } + DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); + return; + } + // Start collecting data and preparing for the solver. CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); @@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) Types.clear(); RegUses.clear(); + if (Solution.empty()) + return; + #ifndef NDEBUG // Formulae should be legal. for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), @@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // Now that we've decided what we want, make it so. ImplementSolution(Solution, P); + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } } void LSRInstance::print_factors_and_types(raw_ostream &OS) const { @@ -3793,7 +3898,7 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const { OS << '*' << *I; } - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { if (!First) OS << ", "; First = false; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index fef6bc3..91395b2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Target/TargetData.h" #include <climits> using namespace llvm; @@ -39,6 +40,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); +// Temporary flag to be removed in 3.0 +static cl::opt<bool> +NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, + cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); + namespace { class LoopUnroll : public LoopPass { public: @@ -49,7 +55,7 @@ namespace { CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -57,11 +63,11 @@ namespace { /// that the loop unroll should be performed regardless of how much /// code expansion would result. static const unsigned NoThreshold = UINT_MAX; - + // Threshold to use when optsize is specified (and there is no // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; - + unsigned CurrentCount; unsigned CurrentThreshold; bool CurrentAllowPartial; @@ -79,6 +85,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); + AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next @@ -101,45 +108,62 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { } /// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { +static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, + const TargetData *TD) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I, TD); NumCalls = Metrics.NumInlineCandidates; - + unsigned LoopSize = Metrics.NumInsts; - + // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's // not a problem for code quality. if (LoopSize == 0) LoopSize = 1; - + return LoopSize; } bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); (void)Header; - + // Determine the current unrolling threshold. While this is normally set // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. unsigned Threshold = CurrentThreshold; - if (!UserThreshold && + if (!UserThreshold && Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) Threshold = OptSizeUnrollThreshold; - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = CurrentCount; - + // Find trip count and trip multiple if count is not available + unsigned TripCount = 0; + unsigned TripMultiple = 1; + if (!NoSCEVUnroll) { + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); + } + } + else { + TripCount = L->getSmallConstantTripCount(); + if (TripCount == 0) + TripMultiple = L->getSmallConstantTripMultiple(); + } // Automatically select an unroll count. + unsigned Count = CurrentCount; if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise @@ -152,8 +176,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (Threshold != NoThreshold) { + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); unsigned NumInlineCandidates; - unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NumInlineCandidates != 0) { DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); @@ -182,12 +207,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - Function *F = L->getHeader()->getParent(); - if (!UnrollLoop(L, Count, LI, &LPM)) + if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) return false; - // FIXME: Reconstruct dom info, because it is not preserved properly. - if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) - DT->runOnFunction(*F); return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 840c4b6..458949c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -492,7 +492,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, Value *BranchVal = LIC; if (!isa<ConstantInt>(Val) || Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp"); + BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); else if (Val != ConstantInt::getTrue(Val->getContext())) // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); @@ -561,10 +561,17 @@ void LoopUnswitch::SplitExitEdges(Loop *L, BasicBlock *ExitBlock = ExitBlocks[i]; SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. - SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), - ".us-lcssa", this); + if (!ExitBlock->isLandingPad()) { + SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), + ".us-lcssa", this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", + this, NewBBs); + } } } @@ -632,7 +639,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } - + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. @@ -653,6 +660,19 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } + + if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { + PN = PHINode::Create(LPad->getType(), 0, "", + ExitSucc->getFirstInsertionPt()); + + for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); + I != E; ++I) { + BasicBlock *BB = *I; + LandingPadInst *LPI = BB->getLandingPadInst(); + LPI->replaceAllUsesWith(PN); + PN->addIncoming(LPI, BB); + } + } } // Rewrite the code to refer to itself. diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp index 9087b46..689bbe9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp @@ -20,98 +20,88 @@ #include "llvm/Support/IRBuilder.h" using namespace llvm; -static bool LowerAtomicIntrinsic(IntrinsicInst *II) { - IRBuilder<> Builder(II->getParent(), II); - unsigned IID = II->getIntrinsicID(); - switch (IID) { - case Intrinsic::memory_barrier: - break; +static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { + IRBuilder<> Builder(CXI->getParent(), CXI); + Value *Ptr = CXI->getPointerOperand(); + Value *Cmp = CXI->getCompareOperand(); + Value *Val = CXI->getNewValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CXI->replaceAllUsesWith(Orig); + CXI->eraseFromParent(); + return true; +} - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: { - Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1); +static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { + IRBuilder<> Builder(RMWI->getParent(), RMWI); + Value *Ptr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); - LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Res = NULL; - switch (IID) { - default: assert(0 && "Unrecognized atomic modify operation"); - case Intrinsic::atomic_load_add: - Res = Builder.CreateAdd(Orig, Delta); - break; - case Intrinsic::atomic_load_sub: - Res = Builder.CreateSub(Orig, Delta); - break; - case Intrinsic::atomic_load_and: - Res = Builder.CreateAnd(Orig, Delta); - break; - case Intrinsic::atomic_load_nand: - Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); - break; - case Intrinsic::atomic_load_or: - Res = Builder.CreateOr(Orig, Delta); - break; - case Intrinsic::atomic_load_xor: - Res = Builder.CreateXor(Orig, Delta); - break; - case Intrinsic::atomic_load_max: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Delta, Orig); - break; - case Intrinsic::atomic_load_min: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Orig, Delta); - break; - case Intrinsic::atomic_load_umax: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Delta, Orig); - break; - case Intrinsic::atomic_load_umin: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Orig, Delta); - break; - } - Builder.CreateStore(Res, Ptr); + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; - II->replaceAllUsesWith(Orig); + switch (RMWI->getOperation()) { + default: llvm_unreachable("Unexpected RMW operation"); + case AtomicRMWInst::Xchg: + Res = Val; break; - } - - case Intrinsic::atomic_swap: { - Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1); - LoadInst *Orig = Builder.CreateLoad(Ptr); - Builder.CreateStore(Val, Ptr); - II->replaceAllUsesWith(Orig); + case AtomicRMWInst::Add: + Res = Builder.CreateAdd(Orig, Val); break; - } - - case Intrinsic::atomic_cmp_swap: { - Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1); - Value *Val = II->getArgOperand(2); - - LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); - Value *Res = Builder.CreateSelect(Equal, Val, Orig); - Builder.CreateStore(Res, Ptr); - II->replaceAllUsesWith(Orig); + case AtomicRMWInst::Sub: + Res = Builder.CreateSub(Orig, Val); + break; + case AtomicRMWInst::And: + Res = Builder.CreateAnd(Orig, Val); + break; + case AtomicRMWInst::Nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val)); + break; + case AtomicRMWInst::Or: + Res = Builder.CreateOr(Orig, Val); + break; + case AtomicRMWInst::Xor: + Res = Builder.CreateXor(Orig, Val); + break; + case AtomicRMWInst::Max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::Min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Orig, Val); + break; + case AtomicRMWInst::UMax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::UMin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Orig, Val); break; } + Builder.CreateStore(Res, Ptr); + RMWI->replaceAllUsesWith(Orig); + RMWI->eraseFromParent(); + return true; +} - default: - return false; - } +static bool LowerFenceInst(FenceInst *FI) { + FI->eraseFromParent(); + return true; +} - assert(II->use_empty() && - "Lowering should have eliminated any uses of the intrinsic call!"); - II->eraseFromParent(); +static bool LowerLoadInst(LoadInst *LI) { + LI->setAtomic(NotAtomic); + return true; +} +static bool LowerStoreInst(StoreInst *SI) { + SI->setAtomic(NotAtomic); return true; } @@ -123,9 +113,22 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; - for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++)) - Changed |= LowerAtomicIntrinsic(II); + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) + Changed |= LowerFenceInst(FI); + else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst)) + Changed |= LowerAtomicCmpXchgInst(CXI); + else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst)) + Changed |= LowerAtomicRMWInst(RMWI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isAtomic()) + LowerLoadInst(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isAtomic()) + LowerStoreInst(SI); + } + } return Changed; } }; diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 7ed3db6..eeb8931 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -54,7 +54,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, if (OpC->isZero()) continue; // No offset. // Handle struct indices, which add their field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } @@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; + if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) @@ -448,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { - const Type *EltType = + Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } @@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; if (TD == 0) return false; @@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse() && + if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; @@ -616,7 +616,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!A->hasStructRetAttr()) return false; - const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); + Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) @@ -860,7 +860,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); - const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType(); + Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp index ee132d3..da74e9c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp @@ -180,7 +180,7 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasStructRetAttr()) return false; // Only consider values with pointer types, and not function pointers. - const PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + PointerType *Ty = dyn_cast<PointerType>(Op->getType()); if (!Ty || isa<FunctionType>(Ty->getElementType())) return false; // Conservatively assume anything else is a potential use. @@ -213,8 +213,8 @@ static InstructionClass GetFunctionClass(const Function *F) { const Argument *A0 = AI++; if (AI == AE) // Argument is a pointer. - if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { - const Type *ETy = PTy->getElementType(); + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + Type *ETy = PTy->getElementType(); // Argument is i8*. if (ETy->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) @@ -234,7 +234,7 @@ static InstructionClass GetFunctionClass(const Function *F) { .Default(IC_CallOrUser); // Argument is i8** - if (const PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) if (Pte->getElementType()->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_loadWeakRetained", IC_LoadWeakRetained) @@ -246,11 +246,11 @@ static InstructionClass GetFunctionClass(const Function *F) { // Two arguments, first is i8**. const Argument *A1 = AI++; if (AI == AE) - if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) - if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) if (Pte->getElementType()->isIntegerTy(8)) - if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { - const Type *ETy1 = PTy1->getElementType(); + if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); // Second argument is i8* if (ETy1->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) @@ -258,7 +258,7 @@ static InstructionClass GetFunctionClass(const Function *F) { .Case("objc_initWeak", IC_InitWeak) .Default(IC_CallOrUser); // Second argument is i8**. - if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) if (Pte1->getElementType()->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_moveWeak", IC_MoveWeak) @@ -344,6 +344,10 @@ static InstructionClass GetInstructionClass(const Value *V) { break; default: // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) if (IsPotentialUse(*OI)) @@ -421,9 +425,10 @@ static bool IsAlwaysTail(InstructionClass Class) { /// IsNoThrow - Test if the given class represents instructions which are always /// safe to mark with the nounwind attribute.. static bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. return Class == IC_Retain || Class == IC_RetainRV || - Class == IC_RetainBlock || Class == IC_Release || Class == IC_Autorelease || Class == IC_AutoreleaseRV || @@ -515,6 +520,10 @@ static bool IsObjCIdentifiedObject(const Value *V) { const Value *Pointer = StripPointerCastsAndObjCCalls(LI->getPointerOperand()); if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; StringRef Name = GV->getName(); // These special variables are known to hold values which are not // reference-counted pointers. @@ -738,7 +747,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { switch (GetBasicInstructionClass(CS.getInstruction())) { case IC_Retain: case IC_RetainRV: - case IC_RetainBlock: case IC_Autorelease: case IC_AutoreleaseRV: case IC_NoopCast: @@ -746,6 +754,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, becuase it updates + // pointers when it copies block data. return NoModRef; default: break; @@ -877,7 +887,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // usually can't sink them past other calls, which would be the main // case where it would be useful. -/// TODO: The pointer returned from objc_loadWeakRetained is retained. +// TODO: The pointer returned from objc_loadWeakRetained is retained. + +// TODO: Delete release+retain pairs (rare). #include "llvm/GlobalAlias.h" #include "llvm/Constants.h" @@ -1098,16 +1110,16 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { if (A == S_None || B == S_None) return S_None; - // Note that we can't merge S_CanRelease and S_Use. if (A > B) std::swap(A, B); if (TopDown) { // Choose the side which is further along in the sequence. - if (A == S_Retain && (B == S_CanRelease || B == S_Use)) + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) return B; } else { // Choose the side which is further along in the sequence. if ((A == S_Use || A == S_CanRelease) && - (B == S_Release || B == S_Stop || B == S_MovableRelease)) + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) return A; // If both sides are releases, choose the more conservative one. if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) @@ -1124,13 +1136,19 @@ namespace { /// retain-decrement-use-release sequence or release-use-decrement-retain /// reverese sequence. struct RRInfo { - /// KnownIncremented - After an objc_retain, the reference count of the - /// referenced object is known to be positive. Similarly, before an - /// objc_release, the reference count of the referenced object is known to - /// be positive. If there are retain-release pairs in code regions where the - /// retain count is known to be positive, they can be eliminated, regardless - /// of any side effects between them. - bool KnownIncremented; + /// KnownSafe - After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as /// opposed to objc_retain calls). @@ -1153,7 +1171,7 @@ namespace { SmallPtrSet<Instruction *, 2> ReverseInsertPts; RRInfo() : - KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false), + KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); @@ -1161,7 +1179,7 @@ namespace { } void RRInfo::clear() { - KnownIncremented = false; + KnownSafe = false; IsRetainBlock = false; IsTailCallRelease = false; ReleaseMetadata = 0; @@ -1176,6 +1194,9 @@ namespace { /// RefCount - The known minimum number of reference count increments. unsigned RefCount; + /// NestCount - The known minimum level of retain+release nesting. + unsigned NestCount; + /// Seq - The current position in the sequence. Sequence Seq; @@ -1184,7 +1205,11 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : RefCount(0), Seq(S_None) {} + PtrState() : RefCount(0), NestCount(0), Seq(S_None) {} + + void SetAtLeastOneRefCount() { + if (RefCount == 0) RefCount = 1; + } void IncrementRefCount() { if (RefCount != UINT_MAX) ++RefCount; @@ -1194,14 +1219,22 @@ namespace { if (RefCount != 0) --RefCount; } - void ClearRefCount() { - RefCount = 0; - } - bool IsKnownIncremented() const { return RefCount > 0; } + void IncrementNestCount() { + if (NestCount != UINT_MAX) ++NestCount; + } + + void DecrementNestCount() { + if (NestCount != 0) --NestCount; + } + + bool IsKnownNested() const { + return NestCount > 0; + } + void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1233,6 +1266,7 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); RefCount = std::min(RefCount, Other.RefCount); + NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1245,7 +1279,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) RRI.ReleaseMetadata = 0; - RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented; + RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), @@ -1316,7 +1350,7 @@ namespace { } void clearBottomUpPointers() { - PerPtrTopDown.clear(); + PerPtrBottomUp.clear(); } void clearTopDownPointers() { @@ -1334,6 +1368,12 @@ namespace { unsigned GetAllPathCount() const { return TopDownPathCount * BottomUpPathCount; } + + /// IsVisitedTopDown - Test whether the block for this BBState has been + /// visited by the top-down portion of the algorithm. + bool isVisitedTopDown() const { + return TopDownPathCount != 0; + } }; } @@ -1364,7 +1404,7 @@ void BBState::MergePred(const BBState &Other) { /*TopDown=*/true); } - // For each entry in our set, if the other set doens't have an entry with the + // For each entry in our set, if the other set doesn't have an entry with the // same key, force it to merge with an empty entry. for (ptr_iterator MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) @@ -1389,7 +1429,7 @@ void BBState::MergeSucc(const BBState &Other) { /*TopDown=*/false); } - // For each entry in our set, if the other set doens't have an entry + // For each entry in our set, if the other set doesn't have an entry // with the same key, force it to merge with an empty entry. for (ptr_iterator MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; ++MI) @@ -1406,15 +1446,11 @@ namespace { /// Run - A flag indicating whether this optimization pass should run. bool Run; - /// RetainFunc, RelaseFunc - Declarations for objc_retain, - /// objc_retainBlock, and objc_release. - Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc; - /// RetainRVCallee, etc. - Declarations for ObjC runtime /// functions, for use in creating calls to them. These are initialized /// lazily to avoid cluttering up the Module with unused declarations. Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, - *RetainCallee, *AutoreleaseCallee; + *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; /// UsedInThisFunciton - Flags which determine whether each of the /// interesting runtine functions is in fact used in the current function. @@ -1428,6 +1464,7 @@ namespace { Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); Constant *getRetainCallee(Module *M); + Constant *getRetainBlockCallee(Module *M); Constant *getAutoreleaseCallee(Module *M); void OptimizeRetainCall(Function &F, Instruction *Retain); @@ -1452,11 +1489,13 @@ namespace { void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts); + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M); bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases); + DenseMap<Value *, RRInfo> &Releases, + Module *M); void OptimizeWeakCalls(Function &F); @@ -1501,7 +1540,7 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1518,7 +1557,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1561,6 +1600,23 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { return RetainCallee; } +Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { + if (!RetainBlockCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + RetainBlockCallee = + M->getOrInsertFunction( + "objc_retainBlock", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainBlockCallee; +} + Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); @@ -1904,12 +1960,19 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { // Check for a return of the pointer value. const Value *Ptr = GetObjCArg(AutoreleaseRV); - for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); - UI != UE; ++UI) { - const User *I = *UI; - if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) - return; - } + SmallVector<const Value *, 2> Users; + Users.push_back(Ptr); + do { + Ptr = Users.pop_back_val(); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + if (isa<BitCastInst>(I)) + Users.push_back(I); + } + } while (!Users.empty()); Changed = true; ++NumPeeps; @@ -1953,7 +2016,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0))) { - const Type *Ty = CI->getArgOperand(0)->getType(); + Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), CI); @@ -1968,7 +2031,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0)) || isNullOrUndef(CI->getArgOperand(1))) { - const Type *Ty = CI->getArgOperand(0)->getType(); + Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), CI); @@ -2090,7 +2153,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { ++NumPartialNoops; // Clone the call into each predecessor that has a non-null value. CallInst *CInst = cast<CallInst>(Inst); - const Type *ParamTy = CInst->getArgOperand(0)->getType(); + Type *ParamTy = CInst->getArgOperand(0)->getType(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); @@ -2132,41 +2195,49 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) - switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { case S_None: - case S_CanRelease: - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); - SomeSuccHasSame = false; - break; + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } case S_Use: SomeSuccHasSame = true; break; case S_Stop: case S_Release: case S_MovableRelease: - AllSuccsHaveSame = false; + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; break; case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + } // If the state at the other end of any of the successor edges // matches the current state, require all edges to match. This // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + S.ClearSequenceProgress(); } case S_CanRelease: { const Value *Arg = I->first; const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) - switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { - case S_None: - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); - SomeSuccHasSame = false; - break; + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { + case S_None: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } case S_CanRelease: SomeSuccHasSame = true; break; @@ -2174,16 +2245,18 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, case S_Release: case S_MovableRelease: case S_Use: - AllSuccsHaveSame = false; + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; break; case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + } // If the state at the other end of any of the successor edges // matches the current state, require all edges to match. This // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + S.ClearSequenceProgress(); } } } @@ -2207,6 +2280,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (Succ == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. if (I == BBStates.end()) continue; MyStates.InitFromSucc(I->second); @@ -2245,11 +2320,12 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); S.RRI.clear(); - S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); S.IncrementRefCount(); + S.IncrementNestCount(); break; } case IC_RetainBlock: @@ -2259,6 +2335,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.DecrementRefCount(); + S.SetAtLeastOneRefCount(); + S.DecrementNestCount(); + + // An objc_retainBlock call with just a use still needs to be kept, + // because it may be copying a block from the stack to the heap. + if (Class == IC_RetainBlock && S.GetSeq() == S_Use) + S.SetSeq(S_CanRelease); switch (S.GetSeq()) { case S_Stop: @@ -2281,7 +2364,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } - break; + continue; } case IC_AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. @@ -2305,26 +2388,22 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, PtrState &S = MI->second; Sequence Seq = S.GetSeq(); - // Check for possible retains and releases. + // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - // Check for a retain (we're going bottom-up here). S.DecrementRefCount(); - - // Check for a release. - if (!IsRetain(Class) && Class != IC_RetainBlock) - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } } // Check for possible direct uses. @@ -2332,14 +2411,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, case S_Release: case S_MovableRelease: if (CanUse(Inst, Ptr, PA, Class)) { - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); S.SetSeq(S_Use); } else if (Seq == S_Release && (Class == IC_User || Class == IC_CallOrUser)) { // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); } break; @@ -2378,14 +2457,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (Pred == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - if (I == BBStates.end()) + assert(I != BBStates.end()); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. + if (!I->second.isVisitedTopDown()) continue; MyStates.InitFromPred(I->second); while (PI != PE) { Pred = *PI++; if (Pred != BB) { I = BBStates.find(Pred); - if (I != BBStates.end()) + assert(I != BBStates.end()); + if (I->second.isVisitedTopDown()) MyStates.MergePred(I->second); } } @@ -2422,18 +2505,23 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, S.SetSeq(S_Retain); S.RRI.clear(); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownIncremented = S.IsKnownIncremented(); + // Don't check S.IsKnownIncremented() here because it's not + // sufficient. + S.RRI.KnownSafe = S.IsKnownNested(); S.RRI.Calls.insert(Inst); } + S.SetAtLeastOneRefCount(); S.IncrementRefCount(); - break; + S.IncrementNestCount(); + continue; } case IC_Release: { Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); S.DecrementRefCount(); + S.DecrementNestCount(); switch (S.GetSeq()) { case S_Retain: @@ -2478,16 +2566,12 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, Sequence Seq = S.GetSeq(); // Check for possible releases. - if (!IsRetain(Class) && Class != IC_RetainBlock && - CanAlterRefCount(Inst, Ptr, PA, Class)) { - // Check for a release. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { S.DecrementRefCount(); - - // Check for a release. switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); // One call can't cause a transition from S_Retain to S_CanRelease @@ -2511,8 +2595,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (CanUse(Inst, Ptr, PA, Class)) S.SetSeq(S_Use); break; - case S_Use: case S_Retain: + // An objc_retainBlock call may be responsible for copying the block + // data from the stack to the heap. Model this by moving it straight + // from S_Retain to S_Use. + if (S.RRI.IsRetainBlock && + CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } + break; + case S_Use: case S_None: break; case S_Stop: @@ -2533,28 +2627,43 @@ ObjCARCOpt::Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases) { - // Use postorder for bottom-up, and reverse-postorder for top-down, because we + // Use reverse-postorder on the reverse CFG for bottom-up, because we // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. + // call retain on a single pointer without doing a release. We can't use + // ReversePostOrderTraversal here because we want to walk up from each + // function exit point. + SmallPtrSet<BasicBlock *, 16> Visited; + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; + SmallVector<BasicBlock *, 16> Order; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + if (BB->getTerminator()->getNumSuccessors() == 0) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + while (!Stack.empty()) { + pred_iterator End = pred_end(Stack.back().first); + while (Stack.back().second != End) { + BasicBlock *BB = *Stack.back().second++; + if (Visited.insert(BB)) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + Order.push_back(Stack.pop_back_val().first); + } bool BottomUpNestingDetected = false; - SmallVector<BasicBlock *, 8> PostOrder; - for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) { + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + Order.rbegin(), E = Order.rend(); I != E; ++I) { BasicBlock *BB = *I; - PostOrder.push_back(BB); - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); } - // Iterate through the post-order in reverse order, achieving a - // reverse-postorder traversal. We don't use the ReversePostOrderTraversal - // class here because it works by computing its own full postorder iteration, - // recording the sequence, and playing it back in reverse. Since we're already - // doing a full iteration above, we can just record the sequence manually and - // avoid the cost of having ReversePostOrderTraversal compute it. + // Use regular reverse-postorder for top-down. bool TopDownNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator - RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI) - TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases); + typedef ReversePostOrderTraversal<Function *> RPOTType; + RPOTType RPOT(&F); + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + BasicBlock *BB = *I; + TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); + } return TopDownNestingDetected && BottomUpNestingDetected; } @@ -2565,12 +2674,10 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &ReleasesToMove, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts) { - const Type *ArgTy = Arg->getType(); - const Type *ParamTy = - (RetainRVFunc ? RetainRVFunc : - RetainFunc ? RetainFunc : - RetainBlockFunc)->arg_begin()->getType(); + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M) { + Type *ArgTy = Arg->getType(); + Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); // Insert the new retain and release calls. for (SmallPtrSet<Instruction *, 2>::const_iterator @@ -2581,7 +2688,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, new BitCastInst(Arg, ParamTy, "", InsertPt); CallInst *Call = CallInst::Create(RetainsToMove.IsRetainBlock ? - RetainBlockFunc : RetainFunc, + getRetainBlockCallee(M) : getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); if (!RetainsToMove.IsRetainBlock) @@ -2598,8 +2705,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, // The invoke's return value isn't available in the unwind block, // but our releases will never depend on it, because they must be // paired with retains from before the invoke. - InsertPts[0] = II->getNormalDest()->getFirstNonPHI(); - InsertPts[1] = II->getUnwindDest()->getFirstNonPHI(); + InsertPts[0] = II->getNormalDest()->getFirstInsertionPt(); + InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); } else { // Insert code immediately after the last use. InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); @@ -2609,7 +2716,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *InsertPt = *I; Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); // Attach a clang.imprecise_release metadata tag, if appropriate. if (MDNode *M = ReleasesToMove.ReleaseMetadata) Call->setMetadata(ImpreciseReleaseMDKind, M); @@ -2640,7 +2748,8 @@ bool ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { + DenseMap<Value *, RRInfo> &Releases, + Module *M) { bool AnyPairsCompletelyEliminated = false; RRInfo RetainsToMove; RRInfo ReleasesToMove; @@ -2649,21 +2758,36 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 8> DeadInsts; for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ) { - Value *V = (I++)->first; + E = Retains.end(); I != E; ++I) { + Value *V = I->first; if (!V) continue; // blotted Instruction *Retain = cast<Instruction>(V); Value *Arg = GetObjCArg(Retain); - // If the object being released is in static or stack storage, we know it's + // If the object being released is in static storage, we know it's // not being managed by ObjC reference counting, so we can delete pairs // regardless of what possible decrements or uses lie between them. - bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); + bool KnownSafe = isa<Constant>(Arg); + + // Same for stack storage, unless this is an objc_retainBlock call, + // which is responsible for copying the block data from the stack to + // the heap. + if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg)) + KnownSafe = true; + + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>( + StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) + if (GV->isConstant()) + KnownSafe = true; // If a pair happens in a region where it is known that the reference count // is already incremented, we can similarly ignore possible decrements. - bool KnownIncrementedTD = true, KnownIncrementedBU = true; + bool KnownSafeTD = true, KnownSafeBU = true; // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. @@ -2683,7 +2807,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; - KnownIncrementedTD &= NewRetainRRI.KnownIncremented; + KnownSafeTD &= NewRetainRRI.KnownSafe; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewRetainRRI.Calls.begin(), LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { @@ -2739,7 +2863,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> Releases.find(NewRelease); assert(It != Releases.end()); const RRInfo &NewReleaseRRI = It->second; - KnownIncrementedBU &= NewReleaseRRI.KnownIncremented; + KnownSafeBU &= NewReleaseRRI.KnownSafe; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewReleaseRRI.Calls.begin(), LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { @@ -2787,12 +2911,19 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented, we can safely delete the pair - // regardless of what's between them. - if (KnownIncrementedTD || KnownIncrementedBU) { + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + goto next_retain; } // Determine whether the original call points are balanced in the retain and @@ -2803,18 +2934,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) goto next_retain; - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - goto next_retain; - // Ok, everything checks out and we're all set. Let's move some code! Changed = true; AnyPairsCompletelyEliminated = NewCount == 0; NumRRs += OldCount - NewCount; - MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts); + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); next_retain: NewReleases.clear(); @@ -2993,7 +3118,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { bool NestingDetected = Visit(F, BBStates, Retains, Releases); // Transform. - return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected; + return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && + NestingDetected; } /// OptimizeReturns - Look for this pattern: @@ -3072,7 +3198,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { // Check that there is nothing that can affect the reference // count between the retain and the call. - FindDependencies(CanChangeRetainCount, Arg, BB, Retain, + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, DependingInstructions, Visited, PA); if (DependingInstructions.size() != 1) goto next_block; @@ -3117,12 +3244,6 @@ bool ObjCARCOpt::doInitialization(Module &M) { ImpreciseReleaseMDKind = M.getContext().getMDKindID("clang.imprecise_release"); - // Identify the declarations for objc_retain and friends. - RetainFunc = M.getFunction("objc_retain"); - RetainBlockFunc = M.getFunction("objc_retainBlock"); - RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue"); - ReleaseFunc = M.getFunction("objc_release"); - // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release // calls finalizers. @@ -3132,6 +3253,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { AutoreleaseRVCallee = 0; ReleaseCallee = 0; RetainCallee = 0; + RetainBlockCallee = 0; AutoreleaseCallee = 0; return false; @@ -3294,7 +3416,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -3310,7 +3432,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -3377,7 +3499,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); - if (!Load || Load->isVolatile()) return; + if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); @@ -3393,7 +3515,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) ++I; StoreInst *Store = dyn_cast<StoreInst>(I); - if (!Store || Store->isVolatile()) return; + if (!Store || !Store->isSimple()) return; if (Store->getPointerOperand() != Loc.Ptr) return; Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); @@ -3411,8 +3533,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release, ++NumStoreStrongs; LLVMContext &C = Release->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - const Type *I8XX = PointerType::getUnqual(I8X); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) @@ -3548,7 +3670,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (Inst != UserInst && DT->dominates(Inst, UserInst)) { Changed = true; Instruction *Replacement = Inst; - const Type *UseTy = U.get()->getType(); + Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index e6341ae..8f98a5b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -309,7 +309,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::swap(LHS, RHS); bool Success = !I->swapOperands(); assert(Success && "swapOperands failed"); - Success = false; + (void)Success; MadeChange = true; } else if (RHSBO) { // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 083412e..196a847 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -156,7 +156,7 @@ namespace { /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; - SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable. + SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. /// StructValueState - This maintains ValueState for values that have @@ -241,7 +241,7 @@ public: /// this method must be called. void AddTrackedFunction(Function *F) { // Add an entry, F -> undef. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { MRVFunctionsTracked.insert(F); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i), @@ -302,7 +302,7 @@ public: /// markAnythingOverdefined - Mark the specified value overdefined. This /// works with both scalars and structs. void markAnythingOverdefined(Value *V) { - if (const StructType *STy = dyn_cast<StructType>(V->getType())) + if (StructType *STy = dyn_cast<StructType>(V->getType())) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) markOverdefined(getStructValueState(V, i), V); else @@ -417,7 +417,7 @@ private: else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) LV.markConstant(CS->getOperand(i)); // Constants are constant. else if (isa<ConstantAggregateZero>(C)) { - const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); + Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); LV.markConstant(Constant::getNullValue(FieldTy)); } else LV.markOverdefined(); // Unknown sort of constant. @@ -471,9 +471,9 @@ private: /// UsersOfOverdefinedPHIs map for PN, remove them now. void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) { if (UsersOfOverdefinedPHIs.empty()) return; - std::multimap<PHINode*, Instruction*>::iterator It, E; - tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN); - while (It != E) { + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy It = Range.first, E = Range.second; It != E;) { if (It->second == I) UsersOfOverdefinedPHIs.erase(It++); else @@ -486,9 +486,9 @@ private: /// (Duplicate entries do not break anything directly, but can lead to /// exponential growth of the table in rare cases.) void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) { - std::multimap<PHINode*, Instruction*>::iterator J, E; - tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN); - for (; J != E; ++J) + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy J = Range.first, E = Range.second; J != E; ++J) if (J->second == I) return; UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I)); @@ -515,6 +515,7 @@ private: void visitShuffleVectorInst(ShuffleVectorInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } // Instructions that cannot be folded away. void visitStoreInst (StoreInst &I); @@ -528,8 +529,12 @@ private: visitTerminatorInst(II); } void visitCallSite (CallSite CS); + void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } + void visitFenceInst (FenceInst &I) { /*returns void*/ } + void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); } + void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } @@ -577,6 +582,10 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) { + if (TI.getNumSuccessors() < 2) { + Succs[0] = true; + return; + } LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -637,6 +646,9 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { return true; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + if (SI->getNumSuccessors() < 2) + return true; + LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -692,13 +704,14 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // There may be instructions using this PHI node that are not overdefined // themselves. If so, make sure that they know that the PHI node operand // changed. - std::multimap<PHINode*, Instruction*>::iterator I, E; - tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN); - if (I == E) + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN); + + if (Range.first == Range.second) return; SmallVector<Instruction*, 16> Users; - for (; I != E; ++I) + for (ItTy I = Range.first, E = Range.second; I != E; ++I) Users.push_back(I->second); while (!Users.empty()) visit(Users.pop_back_val()); @@ -772,7 +785,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { // Handle functions that return multiple values. if (!TrackedMultipleRetVals.empty()) { - if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType())) + if (StructType *STy = dyn_cast<StructType>(ResultOp->getType())) if (MRVFunctionsTracked.count(F)) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, @@ -825,7 +838,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { } void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { - const StructType *STy = dyn_cast<StructType>(IVI.getType()); + StructType *STy = dyn_cast<StructType>(IVI.getType()); if (STy == 0) return markOverdefined(&IVI); @@ -925,7 +938,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { // Could annihilate value. if (I.getOpcode() == Instruction::And) markConstant(IV, &I, Constant::getNullValue(I.getType())); - else if (const VectorType *PT = dyn_cast<VectorType>(I.getType())) + else if (VectorType *PT = dyn_cast<VectorType>(I.getType())) markConstant(IV, &I, Constant::getAllOnesValue(PT)); else markConstant(IV, &I, @@ -1179,8 +1192,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { } Constant *Ptr = Operands[0]; - markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1, - Operands.size()-1)); + ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end()); + markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); } void SCCPSolver::visitStoreInst(StoreInst &SI) { @@ -1278,7 +1291,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) + if (Constant *C = ConstantFoldCall(F, Operands)) return markConstant(I, C); } @@ -1303,7 +1316,7 @@ CallOverdefined: continue; } - if (const StructType *STy = dyn_cast<StructType>(AI->getType())) { + if (StructType *STy = dyn_cast<StructType>(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { LatticeVal CallArg = getStructValueState(*CAI, i); mergeInValue(getStructValueState(AI, i), AI, CallArg); @@ -1315,7 +1328,7 @@ CallOverdefined: } // If this is a single/zero retval case, see if we're tracking the function. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { if (!MRVFunctionsTracked.count(F)) goto CallOverdefined; // Not tracking this callee. @@ -1419,67 +1432,116 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // Look for instructions which produce undef values. if (I->getType()->isVoidTy()) continue; - if (const StructType *STy = dyn_cast<StructType>(I->getType())) { - // Only a few things that can be structs matter for undef. Just send - // all their results to overdefined. We could be more precise than this - // but it isn't worth bothering. - if (isa<CallInst>(I) || isa<SelectInst>(I)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - LatticeVal &LV = getStructValueState(I, i); - if (LV.isUndefined()) - markOverdefined(LV, I); - } + if (StructType *STy = dyn_cast<StructType>(I->getType())) { + // Only a few things that can be structs matter for undef. + + // Tracked calls must never be marked overdefined in ResolvedUndefsIn. + if (CallSite CS = CallSite(I)) + if (Function *F = CS.getCalledFunction()) + if (MRVFunctionsTracked.count(F)) + continue; + + // extractvalue and insertvalue don't need to be marked; they are + // tracked as precisely as their operands. + if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) + continue; + + // Send the results of everything else to overdefined. We could be + // more precise than this but it isn't worth bothering. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + LatticeVal &LV = getStructValueState(I, i); + if (LV.isUndefined()) + markOverdefined(LV, I); } continue; } - + LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; - // No instructions using structs need disambiguation. - if (I->getOperand(0)->getType()->isStructTy()) + // extractvalue is safe; check here because the argument is a struct. + if (isa<ExtractValueInst>(I)) continue; - // Get the lattice values of the first two operands for use below. + // Compute the operand LatticeVals, for convenience below. + // Anything taking a struct is conservatively assumed to require + // overdefined markings. + if (I->getOperand(0)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } LatticeVal Op0LV = getValueState(I->getOperand(0)); LatticeVal Op1LV; if (I->getNumOperands() == 2) { - // No instructions using structs need disambiguation. - if (I->getOperand(1)->getType()->isStructTy()) - continue; - - // If this is a two-operand instruction, and if both operands are - // undefs, the result stays undef. + if (I->getOperand(1)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } + Op1LV = getValueState(I->getOperand(1)); - if (Op0LV.isUndefined() && Op1LV.isUndefined()) - continue; } - // If this is an instructions whose result is defined even if the input is // not fully defined, propagate the information. - const Type *ITy = I->getType(); + Type *ITy = I->getType(); switch (I->getOpcode()) { - default: break; // Leave the instruction as an undef. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + break; // Any undef -> undef + case Instruction::FSub: + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // Floating-point binary operation: be conservative. + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + markForcedConstant(I, Constant::getNullValue(ITy)); + else + markOverdefined(I); + return true; case Instruction::ZExt: - // After a zero extend, we know the top part is zero. SExt doesn't have - // to be handled here, because we don't know whether the top part is 1's - // or 0's. - case Instruction::SIToFP: // some FP values are not possible, just use 0. - case Instruction::UIToFP: // some FP values are not possible, just use 0. + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + // undef -> 0; some outputs are impossible markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Or: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef | X -> -1. X could be -1. markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; + case Instruction::Xor: + // undef ^ undef -> 0; strictly speaking, this is not strictly + // necessary, but we try to be nice to people who expect this + // behavior in simple cases + if (Op0LV.isUndefined() && Op1LV.isUndefined()) { + markForcedConstant(I, Constant::getNullValue(ITy)); + return true; + } + // undef ^ X -> undef + break; + case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: @@ -1494,26 +1556,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { return true; case Instruction::AShr: - // undef >>s X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >>s undef -> X. X could be 0, X could have the high-bit known set. - if (Op0LV.isConstant()) - markForcedConstant(I, Op0LV.getConstant()); - else - markOverdefined(I); + // X >>a undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef >>a X -> all ones + markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; case Instruction::LShr: case Instruction::Shl: - // undef >> X -> undef. No change. - // undef << X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >> undef -> 0. X could be 0. - // X << undef -> 0. X could be 0. + // X << undef -> undef. + // X >> undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef << X -> 0 + // undef >> X -> 0 markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Select: + Op1LV = getValueState(I->getOperand(1)); // undef ? X : Y -> X or Y. There could be commonality between X/Y. if (Op0LV.isUndefined()) { if (!Op1LV.isConstant()) // Pick the constant one if there is any. @@ -1533,9 +1593,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { else markOverdefined(I); return true; + case Instruction::Load: + // A load here means one of two things: a load of undef from a global, + // a load from an unknown pointer. Either way, having it return undef + // is okay. + break; + case Instruction::ICmp: + // X == undef -> undef. Other comparisons get more complicated. + if (cast<ICmpInst>(I)->isEquality()) + break; + markOverdefined(I); + return true; case Instruction::Call: - // If a call has an undef result, it is because it is constant foldable - // but one of the inputs was undef. Just force the result to + case Instruction::Invoke: { + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in ResolvedUndefsIn. + if (Function *F = CallSite(I).getCalledFunction()) + if (TrackedRetVals.count(F)) + break; + + // If the call is constant-foldable, we mark it overdefined because + // we do not know what return values are valid. + markOverdefined(I); + return true; + } + default: + // If we don't know what should happen here, conservatively mark it // overdefined. markOverdefined(I); return true; @@ -1621,15 +1707,25 @@ FunctionPass *llvm::createSCCPPass() { static void DeleteInstructionInBlock(BasicBlock *BB) { DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; - - // Delete the instructions backwards, as it has a reduced likelihood of - // having to update as many def-use and use-def chains. - while (!isa<TerminatorInst>(BB->begin())) { - Instruction *I = --BasicBlock::iterator(BB->getTerminator()); - - if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - BB->getInstList().erase(I); + + // Check to see if there are non-terminating instructions to delete. + if (isa<TerminatorInst>(BB->begin())) + return; + + // Delete the instructions backwards, as it has a reduced likelihood of having + // to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; + } + BB->getInstList().erase(Inst); ++NumInstRemoved; } } diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp index 302c287..f6918de 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -63,7 +63,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); - initializeTailDupPass(Registry); initializeTailCallElimPass(Registry); } @@ -187,3 +186,7 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBasicAliasAnalysisPass()); } + +void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerExpectIntrinsicPass()); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 7d6349c..c6d9123 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -129,11 +129,11 @@ namespace { AllocaInfo &Info); void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, AllocaInfo &Info, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); @@ -145,6 +145,9 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); + void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); @@ -253,7 +256,7 @@ class ConvertToScalarInfo { /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const VectorType *VectorTy; + VectorType *VectorTy; /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into @@ -269,11 +272,11 @@ public: private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset); - bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); + void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); + bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, uint64_t Offset, IRBuilder<> &Builder); @@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if (ScalarKind == Unknown) ScalarKind = Integer; - // FIXME: It should be possible to promote the vector type up to the alloca's - // size. if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) ScalarKind = Integer; @@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - const Type *NewTy; + Type *NewTy; if (ScalarKind == Vector) { assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " @@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// (VectorTy) so far at the offset specified by Offset (which is specified in /// bytes). /// -/// There are three cases we handle here: +/// There are two cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A union of vector types with power-of-2 size differences, e.g. a float, -/// <2 x float> and <4 x float>. Here we turn element accesses into insert -/// and extract element operations, and <2 x float> accesses into a cast to -/// <2 x double>, an extract, and a cast back to <2 x float>. -/// 3) A fully general blob of memory, which we turn into some (potentially +/// 2) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. @@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. - if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { + if (VectorType *VInTy = dyn_cast<VectorType>(In)) { if (MergeInVectorType(VInTy, Offset)) return; } else if (In->isFloatTy() || In->isDoubleTy() || @@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // if the implied vector agrees with what we already have and if Offset is // compatible with it. if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { + (!VectorTy || EltSize == VectorTy->getElementType() + ->getPrimitiveSizeInBits()/8)) { if (!VectorTy) { ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; } - - unsigned CurrentEltSize = VectorTy->getElementType() - ->getPrimitiveSizeInBits()/8; - if (EltSize == CurrentEltSize) - return; - - if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) - return; + return; } } @@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, /// returning true if the type was successfully merged and false otherwise. -bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, +bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, uint64_t Offset) { - // TODO: Support nonzero offsets? - if (Offset != 0) - return false; - - // Only allow vectors that are a power-of-2 away from the size of the alloca. - if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) - return false; - - // If this the first vector we see, remember the type so that we know the - // element size. - if (!VectorTy) { + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. If this is a subsequent access, ignore it + // even if it is a differing type but the same size. Worst case we can + // bitcast the resultant vectors. + if (!VectorTy) + VectorTy = VInTy; ScalarKind = Vector; - VectorTy = VInTy; return true; } - unsigned BitWidth = VectorTy->getBitWidth(); - unsigned InBitWidth = VInTy->getBitWidth(); - - // Vectors of the same size can be converted using a simple bitcast. - if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { - ScalarKind = Vector; - return true; - } - - const Type *ElementTy = VectorTy->getElementType(); - const Type *InElementTy = VInTy->getElementType(); - - // Do not allow mixed integer and floating-point accesses from vectors of - // different sizes. - if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) - return false; - - if (ElementTy->isFloatingPointTy()) { - // Only allow floating-point vectors of different sizes if they have the - // same element type. - // TODO: This could be loosened a bit, but would anything benefit? - if (ElementTy != InElementTy) - return false; - - // There are no arbitrary-precision floating-point types, which limits the - // number of legal vector types with larger element types that we can form - // to bitcast and extract a subvector. - // TODO: We could support some more cases with mixed fp128 and double here. - if (!(BitWidth == 64 || BitWidth == 128) || - !(InBitWidth == 64 || InBitWidth == 128)) - return false; - } else { - assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " - "or floating-point."); - unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); - unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); - - // Do not allow integer types smaller than a byte or types whose widths are - // not a multiple of a byte. - if (BitWidth < 8 || InBitWidth < 8 || - BitWidth % 8 != 0 || InBitWidth % 8 != 0) - return false; - } - - // Pick the largest of the two vector types. - ScalarKind = Vector; - if (InBitWidth > BitWidth) - VectorTy = VInTy; - - return true; + return false; } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // Don't break volatile loads. - if (LI->isVolatile()) + if (!LI->isSimple()) return false; // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) @@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || SI->isVolatile()) return false; + if (SI->getOperand(0) == V || !SI->isSimple()) return false; // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; @@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { } if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { - IsNotTrivial = true; // Can't be mem2reg'd. + if (!onlyUsedByLifetimeMarkers(BCI)) + IsNotTrivial = true; // Can't be mem2reg'd. if (!CanConvertToScalar(BCI, Offset)) return false; continue; @@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); // See if all uses can be converted. if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; @@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { continue; } + // If this is a lifetime intrinsic, we can handle it. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + continue; + } + } + // Otherwise, we cannot handle this! return false; } @@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); continue; @@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. - Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); LI->replaceAllUsesWith(NewLoadVal); @@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); Value *SrcPtr = MTI->getSource(); - const PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), SPTy->getAddressSpace()); @@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), DPTy->getAddressSpace()); @@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, continue; } - llvm_unreachable("Unsupported operation!"); - } -} + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + // There's no need to preserve these, as the resulting alloca will be + // converted to a register anyways. + II->eraseFromParent(); + continue; + } + } -/// getScaledElementType - Gets a scaled element type for a partial vector -/// access of an alloca. The input types must be integer or floating-point -/// scalar or vector types, and the resulting type is an integer, float or -/// double. -static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, - unsigned NewBitWidth) { - bool IsFP1 = Ty1->isFloatingPointTy() || - (Ty1->isVectorTy() && - cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); - bool IsFP2 = Ty2->isFloatingPointTy() || - (Ty2->isVectorTy() && - cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); - - LLVMContext &Context = Ty1->getContext(); - - // Prefer floating-point types over integer types, as integer types may have - // been created by earlier scalar replacement. - if (IsFP1 || IsFP2) { - if (NewBitWidth == 32) - return Type::getFloatTy(Context); - if (NewBitWidth == 64) - return Type::getDoubleTy(Context); + llvm_unreachable("Unsupported operation!"); } - - return Type::getIntNTy(Context, NewBitWidth); -} - -/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector -/// to another vector of the same element type which has the same allocation -/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). -static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, - IRBuilder<> &Builder) { - const Type *FromType = FromVal->getType(); - const VectorType *FromVTy = cast<VectorType>(FromType); - const VectorType *ToVTy = cast<VectorType>(ToType); - assert((ToVTy->getElementType() == FromVTy->getElementType()) && - "Vectors must have the same element type"); - Value *UnV = UndefValue::get(FromType); - unsigned numEltsFrom = FromVTy->getNumElements(); - unsigned numEltsTo = ToVTy->getNumElements(); - - SmallVector<Constant*, 3> Args; - const Type* Int32Ty = Builder.getInt32Ty(); - unsigned minNumElts = std::min(numEltsFrom, numEltsTo); - unsigned i; - for (i=0; i != minNumElts; ++i) - Args.push_back(ConstantInt::get(Int32Ty, i)); - - if (i < numEltsTo) { - Constant* UnC = UndefValue::get(Int32Ty); - for (; i != numEltsTo; ++i) - Args.push_back(UnC); - } - Constant *Mask = ConstantVector::get(Args); - return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); } /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer @@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. Value *ConvertToScalarInfo:: -ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, +ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - const Type *FromType = FromVal->getType(); + Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (FromTypeSize == ToTypeSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (FromType->getPrimitiveSizeInBits() == - ToType->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); - else - return CreateShuffleVectorCast(FromVal, ToType, Builder); - } - - if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { - assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " - "of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(FromType, ToType, - ToTypeSize * 8); - unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; - - LLVMContext &Context = FromVal->getContext(); - const Type *CastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Extract, ToType, "tmp"); - } + if (FromTypeSize == ToTypeSize) + return Builder.CreateBitCast(FromVal, ToType); // Otherwise it must be an element access. unsigned Elt = 0; @@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( - Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); if (V->getType() != ToType) - V = Builder.CreateBitCast(V, ToType, "tmp"); + V = Builder.CreateBitCast(V, ToType); return V; } // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. - if (const StructType *ST = dyn_cast<StructType>(ToType)) { + if (StructType *ST = dyn_cast<StructType>(ToType)) { const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } - if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), Offset+i*EltSize, Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); + IntegerType *NTy = cast<IntegerType>(FromVal->getType()); // If this is a big-endian system and the load is narrower than the // full alloca type, we need to do a shift to get the right bits. @@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), - ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), ShAmt)); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), - -ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), -ShAmt)); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); else if (LIBitWidth > NTy->getBitWidth()) FromVal = Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); // If the result is an integer, this is a trunc or bitcast. if (ToType->isIntegerTy()) { // Should be done. } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. - FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); + FromVal = Builder.CreateBitCast(FromVal, ToType); } else { // Otherwise must be a pointer. - FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); + FromVal = Builder.CreateIntToPtr(FromVal, ToType); } assert(FromVal->getType() == ToType && "Didn't convert right?"); return FromVal; @@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t Offset, IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. - const Type *AllocaType = Old->getType(); + Type *AllocaType = Old->getType(); LLVMContext &Context = Old->getContext(); - if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { + if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (VTy->getPrimitiveSizeInBits() == - SV->getType()->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); - else - return CreateShuffleVectorCast(SV, VTy, Builder); - } - - if (isPowerOf2_64(VecSize / ValSize)) { - assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " - "value of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), - ValSize); - unsigned NumCastVectorElements = VecSize / ValSize; - - LLVMContext &Context = SV->getContext(); - const Type *OldCastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - - Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Insert = - Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Insert, AllocaType, "tmp"); - } + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType); // Must be an element insertion. assert(SV->getType() == VTy->getElementType()); uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), - "tmp"); + return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); } // If SV is a first-class aggregate value, insert each value recursively. - if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { + if (StructType *ST = dyn_cast<StructType>(SV->getType())) { const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), Builder); @@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, return Old; } - if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); } return Old; @@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, - IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { if (SV->getType()->getPrimitiveSizeInBits() < AllocaType->getPrimitiveSizeInBits()) - SV = Builder.CreateZExt(SV, AllocaType, "tmp"); + SV = Builder.CreateZExt(SV, AllocaType); else { // Truncation may be needed if storing more than the alloca can hold // (undefined behavior). - SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); + SV = Builder.CreateTrunc(SV, AllocaType); SrcWidth = DestWidth; SrcStoreWidth = DestStoreWidth; } @@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), - ShAmt), "tmp"); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), - -ShAmt), "tmp"); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); Mask = Mask.lshr(-ShAmt); } @@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { // trapping load in the predecessor if it is a critical edge. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); + Value *InVal = PN->getIncomingValue(i); + + // If the terminator of the predecessor has side-effects (an invoke), + // there is no safe place to put a load in the predecessor. + if (Pred->getTerminator()->mayHaveSideEffects()) + return false; + + // If the value is produced by the terminator of the predecessor + // (an invoke), there is no valid place to put a load in the predecessor. + if (Pred->getTerminator() == InVal) + return false; // If the predecessor has a single successor, then the edge isn't critical. if (Pred->getTerminator()->getNumSuccessors() == 1) continue; - - Value *InVal = PN->getIncomingValue(i); - - // If the InVal is an invoke in the pred, we can't put a load on the edge. - if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) - if (II->getParent() == Pred) - return false; // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. @@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { UI != UE; ++UI) { User *U = *UI; if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return false; continue; } if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI || SI->isVolatile()) + if (SI->getOperand(0) == AI || !SI->isSimple()) return false; // Don't allow a store OF the AI, only INTO the AI. continue; } @@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (onlyUsedByLifetimeMarkers(BCI)) { + InstsToRewrite.insert(BCI); + continue; + } + } + return false; } @@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { + // This could only be a bitcast used by nothing but lifetime intrinsics. + for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); + I != E;) { + Use &U = I.getUse(); + ++I; + cast<Instruction>(U.getUser())->eraseFromParent(); + } + BCI->eraseFromParent(); + continue; + } + if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { // Selects in InstsToRewrite only have load uses. Rewrite each as two // loads with a new select. @@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } - const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); + Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), PN->getName()+".ld", PN); @@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) { /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. static bool ShouldAttemptScalarRepl(AllocaInst *AI) { - const Type *T = AI->getAllocatedType(); + Type *T = AI->getAllocatedType(); // Do not promote any struct into more than 32 separate vars. - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) return ST->getNumElements() <= 32; // Arrays are much less likely to be safe for SROA; only consider // them if they are very small. - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) + if (ArrayType *AT = dyn_cast<ArrayType>(T)) return AT->getNumElements() <= 8; return false; } @@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, @@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI, WorkList.push_back(NA); // Add to worklist for recursive processing } } else { - const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); + ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); ElementAllocas.reserve(AT->getNumElements()); - const Type *ElTy = AT->getElementType(); + Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); @@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, UI.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return MarkUnsafe(Info, User); } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { @@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, User); isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; @@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) MarkUnsafe(Info, GEPI); } @@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, /// elements of the same type (which is always true for arrays). If so, /// return true with NumElts and EltTy set to the number of elements and the /// element type, respectively. -static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, - const Type *&EltTy) { - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { +static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, + Type *&EltTy) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) { NumElts = AT->getNumElements(); EltTy = (NumElts == 0 ? 0 : AT->getElementType()); return true; } - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { NumElts = ST->getNumContainedTypes(); EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { @@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are /// "homogeneous" aggregates with the same element type and number of elements. -static bool isCompatibleAggregate(const Type *T1, const Type *T2) { +static bool isCompatibleAggregate(Type *T1, Type *T2) { if (T1 == T2) return true; unsigned NumElts1, NumElts2; - const Type *EltTy1, *EltTy2; + Type *EltTy1, *EltTy2; if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && isHomogeneousAggregate(T2, NumElts2, EltTy2) && NumElts1 == NumElts2 && @@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) { /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a /// unit. If false, it only allows accesses known to be in a single element. void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. @@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, } } // Check if the offset/size correspond to a component within the alloca type. - const Type *T = Info.AI->getAllocatedType(); + Type *T = Info.AI->getAllocatedType(); if (TypeHasComponent(T, Offset, MemSize)) { Info.hasSubelementAccess = true; return; @@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { + Type *EltTy; uint64_t EltSize; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); EltSize = TD->getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); EltSize = TD->getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) @@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // address operand will be updated, so nothing else needs to be done. continue; } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); + } + continue; + } if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: @@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); + Type *SIType = Val->getType(); if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc @@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy) { uint64_t Idx = 0; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); @@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, IdxTy = Type::getInt32Ty(T->getContext()); return Idx; } - const ArrayType *AT = cast<ArrayType>(T); + ArrayType *AT = cast<ArrayType>(T); T = AT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; @@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { uint64_t OldOffset = Offset; SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - const Type *T = AI->getAllocatedType(); - const Type *IdxTy; + Type *T = AI->getAllocatedType(); + Type *IdxTy; uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. @@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, if (Idx == OldIdx) return; - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + Type *i32Ty = Type::getInt32Ty(AI->getContext()); SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { @@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); Val->takeName(GEPI); } if (Val->getType() != GEPI->getType()) @@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, DeadInsts.push_back(GEPI); } +/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it +/// to mark the lifetime of the scalarized memory. +void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); + // Put matching lifetime markers on everything from Offset up to + // Offset+OldSize. + Type *AIType = AI->getAllocatedType(); + uint64_t NewOffset = Offset; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + + IRBuilder<> Builder(II); + uint64_t Size = OldSize->getLimitedValue(); + + if (NewOffset) { + // Splice the first element and index 'NewOffset' bytes in. SROA will + // split the alloca again later. + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); + ++Idx; + } + + for (; Idx != NewElts.size() && Size; ++Idx) { + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy); + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(NewElts[Idx], + Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(NewElts[Idx], + Builder.getInt64(EltSize)); + } + DeadInsts.push_back(II); +} + /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, @@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the pointer is not the right type, insert a bitcast to the right // type. - const Type *NewTy = + Type *NewTy = PointerType::get(AI->getType()->getElementType(), AddrSpace); if (OtherPtr->getType() != NewTy) @@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, OtherPtr->getName()+"."+Twine(i), MI); uint64_t EltOffset; - const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); - const Type *OtherTy = OtherPtrTy->getElementType(); - if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { + PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); + Type *OtherTy = OtherPtrTy->getElementType(); + if (StructType *ST = dyn_cast<StructType>(OtherTy)) { EltOffset = TD->getStructLayout(ST)->getElementOffset(i); } else { - const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); + Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } @@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } Value *EltPtr = NewElts[i]; - const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); + Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { @@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy->getScalarType(); + Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); @@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, assert(StoreVal->getType() == ValTy && "Type mismatch!"); // If the requested value was a vector constant, create it. - if (EltTy != ValTy) { - unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); + if (EltTy->isVectorTy()) { + unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); StoreVal = ConstantVector::get(Elts); } @@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); @@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { const StructLayout *Layout = TD->getStructLayout(EltSTy); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. - const Type *FieldTy = EltSTy->getElementType(i); + Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); if (TD->isBigEndian()) @@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } } else { - const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); - const Type *ArrayEltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(AllocaEltTy); + Type *ArrayEltTy = ATy->getElementType(); uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); @@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI @@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // have different ways to compute the element offset. const StructLayout *Layout = 0; uint64_t ArrayEltBitOffset = 0; - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = TD->getStructLayout(EltSTy); } else { - const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); + Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } @@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // Load the value from the alloca. If the NewElt is an aggregate, cast // the pointer to an integer of the same size before doing the load. Value *SrcField = NewElts[i]; - const Type *FieldTy = + Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && !FieldTy->isVectorTy()) @@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding in between the elements that would be split apart /// by SROA; return false otherwise. -static bool HasPadding(const Type *Ty, const TargetData &TD) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { +static bool HasPadding(Type *Ty, const TargetData &TD) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Ty = ATy->getElementType(); return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } // SROA currently handles only Arrays and Structs. - const StructType *STy = cast<StructType>(Ty); + StructType *STy = cast<StructType>(Ty); const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // and fusion code. if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { // If the struct/array just has one element, use basic SRoA. - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { if (ST->getNumElements() > 1) return false; } else { if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) @@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. - if (LI->isVolatile()) return false; + if (!LI->isSimple()) return false; continue; } diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7c415e5..fbb9465 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -134,7 +134,7 @@ namespace { struct StrCatOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcat" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -184,7 +184,7 @@ struct StrCatOpt : public LibCallOptimization { struct StrNCatOpt : public StrCatOpt { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncat" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -232,7 +232,7 @@ struct StrNCatOpt : public StrCatOpt { struct StrChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strchr" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -282,7 +282,7 @@ struct StrChrOpt : public LibCallOptimization { struct StrRChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strrchr" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -323,7 +323,7 @@ struct StrRChrOpt : public LibCallOptimization { struct StrCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcmp" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || @@ -338,16 +338,17 @@ struct StrCmpOpt : public LibCallOptimization { bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x - return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - - if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - // strcmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) return ConstantInt::get(CI->getType(), - strcmp(Str1.c_str(),Str2.c_str())); + StringRef(Str1).compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); @@ -371,7 +372,7 @@ struct StrCmpOpt : public LibCallOptimization { struct StrNCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncmp" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || @@ -400,16 +401,20 @@ struct StrNCmpOpt : public LibCallOptimization { bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x - return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = StringRef(Str1).substr(0, Length); + StringRef SubStr2 = StringRef(Str2).substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - // strncmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) - return ConstantInt::get(CI->getType(), - strncmp(Str1.c_str(), Str2.c_str(), Length)); return 0; } }; @@ -426,7 +431,7 @@ struct StrCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcpy" function prototype. unsigned NumParams = OptChkCall ? 3 : 2; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != NumParams || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || @@ -462,7 +467,7 @@ struct StrCpyOpt : public LibCallOptimization { struct StrNCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || @@ -511,7 +516,7 @@ struct StrNCpyOpt : public LibCallOptimization { struct StrLenOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) @@ -537,7 +542,7 @@ struct StrLenOpt : public LibCallOptimization { struct StrPBrkOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -575,7 +580,7 @@ struct StrPBrkOpt : public LibCallOptimization { struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy()) @@ -597,7 +602,7 @@ struct StrToOpt : public LibCallOptimization { struct StrSpnOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -626,7 +631,7 @@ struct StrSpnOpt : public LibCallOptimization { struct StrCSpnOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -658,7 +663,7 @@ struct StrCSpnOpt : public LibCallOptimization { struct StrStrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -722,7 +727,7 @@ struct StrStrOpt : public LibCallOptimization { struct MemCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy(32)) @@ -773,7 +778,7 @@ struct MemCpyOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -795,7 +800,7 @@ struct MemMoveOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -817,7 +822,7 @@ struct MemSetOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || @@ -840,7 +845,7 @@ struct MemSetOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || @@ -874,8 +879,8 @@ struct PowOpt : public LibCallOptimization { Callee->getAttributes()); Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); - Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp"); - Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); return Sel; } @@ -895,7 +900,7 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || @@ -908,10 +913,10 @@ struct Exp2Opt : public LibCallOptimization { Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); } if (LdExpArg) { @@ -946,7 +951,7 @@ struct Exp2Opt : public LibCallOptimization { struct UnaryDoubleFPOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || !FT->getParamType(0)->isDoubleTy()) return 0; @@ -973,7 +978,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { struct FFSOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || @@ -996,10 +1001,10 @@ struct FFSOpt : public LibCallOptimization { Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, Op, "cttz"); - V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); - V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); - Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); return B.CreateSelect(Cond, V, B.getInt32(0)); } }; @@ -1009,7 +1014,7 @@ struct FFSOpt : public LibCallOptimization { struct IsDigitOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) @@ -1028,7 +1033,7 @@ struct IsDigitOpt : public LibCallOptimization { struct IsAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) @@ -1046,7 +1051,7 @@ struct IsAsciiOpt : public LibCallOptimization { struct AbsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) @@ -1067,7 +1072,7 @@ struct AbsOpt : public LibCallOptimization { struct ToAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isIntegerTy(32)) @@ -1147,7 +1152,7 @@ struct PrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) @@ -1241,7 +1246,7 @@ struct SPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) @@ -1272,7 +1277,7 @@ struct SPrintFOpt : public LibCallOptimization { struct FWriteOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require a pointer, an integer, an integer, a pointer, returning integer. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || !FT->getParamType(2)->isIntegerTy() || @@ -1310,7 +1315,7 @@ struct FPutsOpt : public LibCallOptimization { if (!TD) return 0; // Require two pointers. Also, we can't optimize if return value is used. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) @@ -1379,7 +1384,7 @@ struct FPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) @@ -1410,7 +1415,7 @@ struct FPrintFOpt : public LibCallOptimization { struct PutsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) @@ -1685,7 +1690,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - const FunctionType *FTy = F.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); StringRef Name = F.getName(); switch (Name[0]) { diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp index 705f442..c83f56c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp @@ -153,9 +153,13 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, SmallPtrSet<Instruction *, 8> &Stores) { - if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { - if (L->isVolatile()) return false; + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { AliasAnalysis::Location Loc = AA->getLocation(L); for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), E = Stores.end(); I != E; ++I) @@ -163,11 +167,6 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, return false; } - if (Inst->mayWriteToMemory()) { - Stores.insert(Inst); - return false; - } - if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst)) return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp deleted file mode 100644 index 9dd83c0..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp +++ /dev/null @@ -1,373 +0,0 @@ -//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass performs a limited form of tail duplication, intended to simplify -// CFGs by removing some unconditional branches. This pass is necessary to -// straighten out loops created by the C front-end, but also is capable of -// making other code nicer. After this pass is run, the CFG simplify pass -// should be run to clean up the mess. -// -// This pass could be enhanced in the future to use profile information to be -// more aggressive. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "tailduplicate" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constant.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" -#include <map> -using namespace llvm; - -STATISTIC(NumEliminated, "Number of unconditional branches eliminated"); - -static cl::opt<unsigned> -TailDupThreshold("taildup-threshold", - cl::desc("Max block size to tail duplicate"), - cl::init(1), cl::Hidden); - -namespace { - class TailDup : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(ID) { - initializeTailDupPass(*PassRegistry::getPassRegistry()); - } - - private: - inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); - inline void eliminateUnconditionalBranch(BranchInst *BI); - SmallPtrSet<BasicBlock*, 4> CycleDetector; - }; -} - -char TailDup::ID = 0; -INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false) - -// Public interface to the Tail Duplication pass -FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } - -/// runOnFunction - Top level algorithm - Loop over each unconditional branch in -/// the function, eliminating it if it looks attractive enough. CycleDetector -/// prevents infinite loops by checking that we aren't redirecting a branch to -/// a place it already pointed to earlier; see PR 2323. -bool TailDup::runOnFunction(Function &F) { - bool Changed = false; - CycleDetector.clear(); - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - if (shouldEliminateUnconditionalBranch(I->getTerminator(), - TailDupThreshold)) { - eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator())); - Changed = true; - } else { - ++I; - CycleDetector.clear(); - } - } - return Changed; -} - -/// shouldEliminateUnconditionalBranch - Return true if this branch looks -/// attractive to eliminate. We eliminate the branch if the destination basic -/// block has <= 5 instructions in it, not counting PHI nodes. In practice, -/// since one of these is a terminator instruction, this means that we will add -/// up to 4 instructions to the new block. -/// -/// We don't count PHI nodes in the count since they will be removed when the -/// contents of the block are copied over. -/// -bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, - unsigned Threshold) { - BranchInst *BI = dyn_cast<BranchInst>(TI); - if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! - - BasicBlock *Dest = BI->getSuccessor(0); - if (Dest == BI->getParent()) return false; // Do not loop infinitely! - - // Do not inline a block if we will just get another branch to the same block! - TerminatorInst *DTI = Dest->getTerminator(); - if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) - if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) - return false; // Do not loop infinitely! - - // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, - // because doing so would require breaking critical edges. This should be - // fixed eventually. - if (!DTI->use_empty()) - return false; - - // Do not bother with blocks with only a single predecessor: simplify - // CFG will fold these two blocks together! - pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); - ++PI; - if (PI == PE) return false; // Exactly one predecessor! - - BasicBlock::iterator I = Dest->getFirstNonPHI(); - - for (unsigned Size = 0; I != Dest->end(); ++I) { - if (Size == Threshold) return false; // The block is too large. - - // Don't tail duplicate call instructions. They are very large compared to - // other instructions. - if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; - - // Also alloca and malloc. - if (isa<AllocaInst>(I)) return false; - - // Some vector instructions can expand into a number of instructions. - if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || - isa<InsertElementInst>(I)) return false; - - // Only count instructions that are not debugger intrinsics. - if (!isa<DbgInfoIntrinsic>(I)) ++Size; - } - - // Do not tail duplicate a block that has thousands of successors into a block - // with a single successor if the block has many other predecessors. This can - // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in - // cases that have a large number of indirect gotos. - unsigned NumSuccs = DTI->getNumSuccessors(); - if (NumSuccs > 8) { - unsigned TooMany = 128; - if (NumSuccs >= TooMany) return false; - TooMany = TooMany/NumSuccs; - for (; PI != PE; ++PI) - if (TooMany-- == 0) return false; - } - - // If this unconditional branch is a fall-through, be careful about - // tail duplicating it. In particular, we don't want to taildup it if the - // original block will still be there after taildup is completed: doing so - // would eliminate the fall-through, requiring unconditional branches. - Function::iterator DestI = Dest; - if (&*--DestI == BI->getParent()) { - // The uncond branch is a fall-through. Tail duplication of the block is - // will eliminate the fall-through-ness and end up cloning the terminator - // at the end of the Dest block. Since the original Dest block will - // continue to exist, this means that one or the other will not be able to - // fall through. One typical example that this helps with is code like: - // if (a) - // foo(); - // if (b) - // foo(); - // Cloning the 'if b' block into the end of the first foo block is messy. - - // The messy case is when the fall-through block falls through to other - // blocks. This is what we would be preventing if we cloned the block. - DestI = Dest; - if (++DestI != Dest->getParent()->end()) { - BasicBlock *DestSucc = DestI; - // If any of Dest's successors are fall-throughs, don't do this xform. - for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest); - SI != SE; ++SI) - if (*SI == DestSucc) - return false; - } - } - - // Finally, check that we haven't redirected to this target block earlier; - // there are cases where we loop forever if we don't check this (PR 2323). - if (!CycleDetector.insert(Dest)) - return false; - - return true; -} - -/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to -/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we -/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and -/// DstBlock, return it. -static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, - BasicBlock *DstBlock) { - // SrcBlock must have a single predecessor. - pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); - if (PI == PE || ++PI != PE) return 0; - - BasicBlock *SrcPred = *pred_begin(SrcBlock); - - // Look at the predecessors of DstBlock. One of them will be SrcBlock. If - // there is only one other pred, get it, otherwise we can't handle it. - PI = pred_begin(DstBlock); PE = pred_end(DstBlock); - BasicBlock *DstOtherPred = 0; - BasicBlock *P = *PI; - if (P == SrcBlock) { - if (++PI == PE) return 0; - DstOtherPred = *PI; - if (++PI != PE) return 0; - } else { - DstOtherPred = P; - if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; - } - - // We can handle two situations here: "if then" and "if then else" blocks. An - // 'if then' situation is just where DstOtherPred == SrcPred. - if (DstOtherPred == SrcPred) - return SrcPred; - - // Check to see if we have an "if then else" situation, which means that - // DstOtherPred will have a single predecessor and it will be SrcPred. - PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); - if (PI != PE && *PI == SrcPred) { - if (++PI != PE) return 0; // Not a single pred. - return SrcPred; // Otherwise, it's an "if then" situation. Return the if. - } - - // Otherwise, this is something we can't handle. - return 0; -} - - -/// eliminateUnconditionalBranch - Clone the instructions from the destination -/// block into the source block, eliminating the specified unconditional branch. -/// If the destination block defines values used by successors of the dest -/// block, we may need to insert PHI nodes. -/// -void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { - BasicBlock *SourceBlock = Branch->getParent(); - BasicBlock *DestBlock = Branch->getSuccessor(0); - assert(SourceBlock != DestBlock && "Our predicate is broken!"); - - DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName() - << "]: Eliminating branch: " << *Branch); - - // See if we can avoid duplicating code by moving it up to a dominator of both - // blocks. - if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n"); - - // If there are non-phi instructions in DestBlock that have no operands - // defined in DestBlock, and if the instruction has no side effects, we can - // move the instruction to DomBlock instead of duplicating it. - BasicBlock::iterator BBI = DestBlock->getFirstNonPHI(); - while (!isa<TerminatorInst>(BBI)) { - Instruction *I = BBI++; - - bool CanHoist = I->isSafeToSpeculativelyExecute() && - !I->mayReadFromMemory(); - if (CanHoist) { - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) - if (OpI->getParent() == DestBlock || - (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { - CanHoist = false; - break; - } - if (CanHoist) { - // Remove from DestBlock, move right before the term in DomBlock. - DestBlock->getInstList().remove(I); - DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DEBUG(dbgs() << "Hoisted: " << *I); - } - } - } - } - - // Tail duplication can not update SSA properties correctly if the values - // defined in the duplicated tail are used outside of the tail itself. For - // this reason, we spill all values that are used outside of the tail to the - // stack. - for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) - if (I->isUsedOutsideOfBlock(DestBlock)) { - // We found a use outside of the tail. Create a new stack slot to - // break this inter-block usage pattern. - DemoteRegToStack(*I); - } - - // We are going to have to map operands from the original block B to the new - // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI - // nodes also define part of this mapping. Loop over these PHI nodes, adding - // them to our mapping. - // - std::map<Value*, Value*> ValueMapping; - - BasicBlock::iterator BI = DestBlock->begin(); - bool HadPHINodes = isa<PHINode>(BI); - for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); - - // Clone the non-phi instructions of the dest block into the source block, - // keeping track of the mapping... - // - for (; BI != DestBlock->end(); ++BI) { - Instruction *New = BI->clone(); - New->setName(BI->getName()); - SourceBlock->getInstList().push_back(New); - ValueMapping[BI] = New; - } - - // Now that we have built the mapping information and cloned all of the - // instructions (giving us a new terminator, among other things), walk the new - // instructions, rewriting references of old instructions to use new - // instructions. - // - BI = Branch; ++BI; // Get an iterator to the first new instruction - for (; BI != SourceBlock->end(); ++BI) - for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { - std::map<Value*, Value*>::const_iterator I = - ValueMapping.find(BI->getOperand(i)); - if (I != ValueMapping.end()) - BI->setOperand(i, I->second); - } - - // Next we check to see if any of the successors of DestBlock had PHI nodes. - // If so, we need to add entries to the PHI nodes for SourceBlock now. - for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); - SI != SE; ++SI) { - BasicBlock *Succ = *SI; - for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { - PHINode *PN = cast<PHINode>(PNI); - // Ok, we have a PHI node. Figure out what the incoming value was for the - // DestBlock. - Value *IV = PN->getIncomingValueForBlock(DestBlock); - - // Remap the value if necessary... - std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV); - if (I != ValueMapping.end()) - IV = I->second; - PN->addIncoming(IV, SourceBlock); - } - } - - // Next, remove the old branch instruction, and any PHI node entries that we - // had. - BI = Branch; ++BI; // Get an iterator to the first new instruction - DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... - SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... - - // Final step: now that we have finished everything up, walk the cloned - // instructions one last time, constant propagating and DCE'ing them, because - // they may not be needed anymore. - // - if (HadPHINodes) { - while (BI != SourceBlock->end()) { - Instruction *Inst = BI++; - if (isInstructionTriviallyDead(Inst)) - Inst->eraseFromParent(); - else if (Value *V = SimplifyInstruction(Inst)) { - Inst->replaceAllUsesWith(V); - Inst->eraseFromParent(); - } - } - } - - ++NumEliminated; // We just killed a branch! -} diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp index be7bed1..8e5a1eb 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -222,7 +222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, const TargetData *TD = TLI.getTargetData(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); @@ -557,7 +557,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, Value *Address = User->getOperand(OpNo); if (!Address->getType()->isPointerTy()) return false; - const Type *AddressAccessTy = + Type *AddressAccessTy = cast<PointerType>(Address->getType())->getElementType(); // Do a match against the root of this address, ignoring profitability. This diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index b4f74f9..a7f9efd 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -287,7 +287,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { /// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { BasicBlock::iterator SplitIt = SplitPt; - while (isa<PHINode>(SplitIt)) + while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); @@ -299,138 +299,114 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { // Old dominates New. New node dominates all other nodes dominated by Old. - DomTreeNode *OldNode = DT->getNode(Old); - std::vector<DomTreeNode *> Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); + } } return New; } +/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA +/// analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + Pass *P, bool &HasLoopExit) { + if (!P) return; -/// SplitBlockPredecessors - This method transforms BB by introducing a new -/// basic block into the function, and moving some of the predecessors of BB to -/// be predecessors of the new block. The new predecessors are indicated by the -/// Preds array, which has NumPreds elements in it. The new block is given a -/// suffix of 'Suffix'. -/// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// LoopInfo, and LCCSA but no other analyses. In particular, it does not -/// preserve LoopSimplify (because it's complicated to handle the case where one -/// of the edges being split is an exit of a loop with other exits). -/// -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { - // Create new basic block, insert right before the original block. - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, - BB->getParent(), BB); - - // The new block unconditionally branches to the old block. - BranchInst *BI = BranchInst::Create(BB, NewBB); - - LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; - Loop *L = LI ? LI->getLoopFor(BB) : 0; - bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + Loop *L = LI ? LI->getLoopFor(OldBB) : 0; - // Move the edges from Preds to point to NewBB instead of BB. - // While here, if we need to preserve loop analyses, collect - // some information about how this split will affect loops. - bool HasLoopExit = false; + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; - for (unsigned i = 0; i != NumPreds; ++i) { - // This is slightly more strict than necessary; the minimum requirement - // is that there be no more than one indirectbr branching to BB. And - // all BlockAddress uses would need to be updated. - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); - - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - - if (LI) { - // If we need to preserve LCSSA, determine if any of - // the preds is a loop exit. + if (LI) { + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Preds[i])) - if (!PL->contains(BB)) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) HasLoopExit = true; - // If we need to preserve LoopInfo, note whether any of the - // preds crosses an interesting loop boundary. - if (L) { - if (L->contains(Preds[i])) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; } } // Update dominator tree if available. - DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); if (DT) DT->splitBlock(NewBB); - // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI - // node becomes an incoming value for BB's phi node. However, if the Preds - // list is empty, we need to insert dummy entries into the PHI nodes in BB to - // account for the newly created predecessor. - if (NumPreds == 0) { - // Insert dummy values as the incoming value. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); - return NewBB; + if (!L) return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); } +} +/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming +/// from NewBB. This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock*> Preds, BranchInst *BI, + Pass *P, bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; - - if (L) { - if (IsLoopEntry) { - // Add the new block to the nearest enclosing loop (and not an - // adjacent loop). To find this, examine each of the predecessors and - // determine which loops enclose them, and select the most-nested loop - // which contains the loop containing the block being split. - Loop *InnermostPredLoop = 0; - for (unsigned i = 0; i != NumPreds; ++i) - if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { - // Seek a loop which actually contains the block being split (to - // avoid adjacent loops). - while (PredLoop && !PredLoop->contains(BB)) - PredLoop = PredLoop->getParentLoop(); - // Select the most-nested of these loops which contains the block. - if (PredLoop && - PredLoop->contains(BB) && - (!InnermostPredLoop || - InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) - InnermostPredLoop = PredLoop; - } - if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } else { - L->addBasicBlockToLoop(NewBB, LI->getBase()); - if (SplitMakesNewLoopHeader) - L->moveToHeader(NewBB); - } - } - - // Otherwise, create a new PHI node in NewBB for each PHI node in BB. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); - + // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1; i != NumPreds; ++i) + for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; @@ -441,31 +417,191 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0; i != NumPreds; ++i) + for (unsigned i = 0, e = Preds.size(); i != e; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = - PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } + InVal = NewPHI; } - + // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } +} + +/// SplitBlockPredecessors - This method transforms BB by introducing a new +/// basic block into the function, and moving some of the predecessors of BB to +/// be predecessors of the new block. The new predecessors are indicated by the +/// Preds array, which has NumPreds elements in it. The new block is given a +/// suffix of 'Suffix'. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// LoopInfo, and LCCSA but no other analyses. In particular, it does not +/// preserve LoopSimplify (because it's complicated to handle the case where one +/// of the edges being split is an exit of a loop with other exits). +/// +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + BasicBlock *const *Preds, + unsigned NumPreds, const char *Suffix, + Pass *P) { + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, + BB->getParent(), BB); + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + + // Move the edges from Preds to point to NewBB instead of BB. + for (unsigned i = 0; i != NumPreds; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (NumPreds == 0) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) + cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + return NewBB; + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), + P, HasLoopExit); + + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, + P, HasLoopExit); return NewBB; } +/// SplitLandingPadPredecessors - This method transforms the landing pad, +/// OrigBB, by introducing two new basic blocks into the function. One of those +/// new basic blocks gets the predecessors listed in Preds. The other basic +/// block gets the remaining predecessors of OrigBB. The landingpad instruction +/// OrigBB is clone into both of the new basic blocks. The new blocks are given +/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, +/// it does not preserve LoopSimplify (because it's complicated to handle the +/// case where one of the edges being split is an exit of a loop with other +/// exits). +/// +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock*> Preds, + const char *Suffix1, const char *Suffix2, + Pass *P, + SmallVectorImpl<BasicBlock*> &NewBBs) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = 0; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (SmallVectorImpl<BasicBlock*>::iterator + i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) + (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); + + // Create a PHI node for the two cloned landingpad instructions. + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + /// FindFunctionBackedges - Analyze the specified function to find all of the /// loop backedges in the function and return them. This is a relatively cheap /// (compared to computing dominators and loop info) analysis. diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 92ce500..c052910 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -102,7 +102,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, ++I; // Skip one edge due to the incoming arc from TI. if (!AllowIdenticalEdges) return I != E; - + // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { @@ -155,10 +155,10 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the -/// specified successor will be merged into the same critical edge block. -/// This is most commonly interesting with switch instructions, which may +/// specified successor will be merged into the same critical edge block. +/// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that -/// dest go to one block instead of each going to a different block, but isn't +/// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an @@ -167,15 +167,20 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - Pass *P, bool MergeIdenticalEdges) { + Pass *P, bool MergeIdenticalEdges, + bool DontDeleteUselessPhis) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; - + assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); - + BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isLandingPad()) return 0; + // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); @@ -190,7 +195,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); - + // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { @@ -207,35 +212,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); + BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } - + // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; - + // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB); - + DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } - - + + // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; - + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - + // If we have nothing to update, just return. if (DT == 0 && LI == 0 && PI == 0) return NewBB; @@ -263,7 +268,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } bool NewBBDominatesDestBB = true; - + // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); @@ -274,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; - + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); @@ -285,7 +290,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } OtherPreds.clear(); } - + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { @@ -337,6 +342,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // For each unique exit block... + // FIXME: This code is functionally equivalent to the corresponding + // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { @@ -348,10 +355,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; - if (TIL->contains(P)) + if (TIL->contains(P)) { + if (isa<IndirectBrInst>(P->getTerminator())) { + Preds.clear(); + break; + } Preds.push_back(P); - else + } else { HasPredOutsideOfLoop = true; + } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 14bb17f..4b5f45b 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,8 +58,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); - const Type *I32Ty = B.getInt32Ty(); + Type *I8Ptr = B.getInt8PtrTy(); + Type *I32Ty = B.getInt32Ty(); Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), @@ -102,7 +102,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); + Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -120,7 +120,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); + Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); @@ -361,7 +361,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { this->CI = CI; Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); LLVMContext &Context = CI->getParent()->getContext(); IRBuilder<> B(CI); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 6ea831f..cf21f1e 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -331,15 +331,10 @@ ConstantFoldMappedInstruction(const Instruction *I) { TD); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) - if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), - CE); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], - Ops.size(), TD); + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Ops[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index a08fa35..a0e027b 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -50,10 +50,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I != E; ++I) { GlobalVariable *GV = new GlobalVariable(*New, I->getType()->getElementType(), - false, - GlobalValue::ExternalLinkage, 0, - I->getName()); - GV->setAlignment(I->getAlignment()); + I->isConstant(), I->getLinkage(), + (Constant*) 0, I->getName(), + (GlobalVariable*) 0, + I->isThreadLocal(), + I->getType()->getAddressSpace()); + GV->copyAttributesFrom(I); VMap[I] = GV; } @@ -61,16 +63,19 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *NF = Function::Create(cast<FunctionType>(I->getType()->getElementType()), - GlobalValue::ExternalLinkage, I->getName(), New); + I->getLinkage(), I->getName(), New); NF->copyAttributesFrom(I); VMap[I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); - I != E; ++I) - VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, - I->getName(), NULL, New); + I != E; ++I) { + GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), + I->getName(), NULL, New); + GA->copyAttributesFrom(I); + VMap[I] = GA; + } // Now that all of the things that global variable initializer can refer to // have been created, loop through and copy the global variable referrers @@ -81,9 +86,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); - GV->setLinkage(I->getLinkage()); - GV->setThreadLocal(I->isThreadLocal()); - GV->setConstant(I->isConstant()); } // Similarly, copy over function bodies now... @@ -101,15 +103,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); } - - F->setLinkage(I->getLinkage()); } // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); - GA->setLinkage(I->getLinkage()); if (const Constant *C = I->getAliasee()) GA->setAliasee(MapValue(C, VMap)); } diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0813523..5f47ebb 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -50,14 +50,14 @@ namespace { DominatorTree* DT; bool AggregateArgs; unsigned NumExitBlocks; - const Type *RetTy; + Type *RetTy; public: CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} - Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code); + Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code); - bool isEligible(const std::vector<BasicBlock*> &code); + bool isEligible(ArrayRef<BasicBlock*> code); private: /// definedInRegion - Return true if the specified value is defined in the @@ -290,7 +290,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, paramTy.clear(); paramTy.push_back(StructPtr); } - const FunctionType *funcType = + FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); // Create the new function @@ -317,8 +317,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = - GetElementPtrInst::Create(AI, Idx, Idx+2, - "gep_" + inputs[i]->getName(), TI); + GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -420,7 +419,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); GetElementPtrInst *GEP = - GetElementPtrInst::Create(Struct, Idx, Idx + 2, + GetElementPtrInst::Create(Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); StoreInst *SI = new StoreInst(StructValues[i], GEP); @@ -446,7 +445,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP - = GetElementPtrInst::Create(Struct, Idx, Idx + 2, + = GetElementPtrInst::Create(Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; @@ -561,7 +560,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); GetElementPtrInst *GEP = - GetElementPtrInst::Create(OAI, Idx, Idx + 2, + GetElementPtrInst::Create(OAI, Idx, "gep_" + outputs[out]->getName(), NTRet); new StoreInst(outputs[out], GEP, NTRet); @@ -580,7 +579,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Now that we've done the deed, simplify the switch instruction. - const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { case 0: // There are no successors (the block containing the switch itself), which @@ -655,7 +654,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { /// computed result back into memory. /// Function *CodeExtractor:: -ExtractCodeRegion(const std::vector<BasicBlock*> &code) { +ExtractCodeRegion(ArrayRef<BasicBlock*> code) { if (!isEligible(code)) return 0; @@ -755,9 +754,13 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) { return newFunction; } -bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) { +bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) { + // Deny a single basic block that's a landing pad block. + if (code.size() == 1 && code[0]->isLandingPad()) + return false; + // Deny code region if it contains allocas or vastarts. - for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end(); + for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end(); BB != e; ++BB) for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); I != Ie; ++I) @@ -771,25 +774,23 @@ bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) { } -/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new -/// function +/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new +/// function. /// Function* llvm::ExtractCodeRegion(DominatorTree &DT, - const std::vector<BasicBlock*> &code, + ArrayRef<BasicBlock*> code, bool AggregateArgs) { return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); } -/// ExtractBasicBlock - slurp a natural loop into a brand new function +/// ExtractLoop - Slurp a natural loop into a brand new function. /// Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); } -/// ExtractBasicBlock - slurp a basic block into a brand new function +/// ExtractBasicBlock - Slurp a basic block into a brand new function. /// -Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) { - std::vector<BasicBlock*> Blocks; - Blocks.push_back(BB); - return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks); +Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){ + return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs); } diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index d5b382e..5464dbc 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -45,6 +45,9 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { return InlineFunction(CallSite(II), IFI); } +// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is +// turned on. + /// [LIBUNWIND] Look for an llvm.eh.exception call in the given block. static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) { for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) { @@ -250,20 +253,32 @@ namespace { PHINode *InnerSelectorPHI; SmallVector<Value*, 8> UnwindDestPHIValues; + // FIXME: New EH - These will replace the analogous ones above. + BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; //< Destination for the callee's resume. + LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts. + public: - InvokeInliningInfo(InvokeInst *II) : - OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), - InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) { - - // If there are PHI nodes in the unwind destination block, we - // need to keep track of which values came into them from the - // invoke before removing the edge from this block. - llvm::BasicBlock *invokeBB = II->getParent(); - for (BasicBlock::iterator I = OuterUnwindDest->begin(); - isa<PHINode>(I); ++I) { + InvokeInliningInfo(InvokeInst *II) + : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), + InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0), + OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), + CallerLPad(0), InnerEHValuesPHI(0) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterUnwindDest->begin(); + for (; isa<PHINode>(I); ++I) { // Save the value to use for this edge. - PHINode *phi = cast<PHINode>(I); - UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB)); + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // FIXME: With the new EH, this if/dyn_cast should be a 'cast'. + if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) { + CallerLPad = LPI; } } @@ -281,11 +296,23 @@ namespace { BasicBlock *getInnerUnwindDest(); + // FIXME: New EH - Rename when new EH is turned on. + BasicBlock *getInnerUnwindDestNewEH(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + bool forwardEHResume(CallInst *call, BasicBlock *src); - /// Add incoming-PHI values to the unwind destination block for - /// the given basic block, using the values for the original - /// invoke's source block. + /// forwardResume - Forward the 'resume' instruction to the caller's landing + /// pad block. When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI); + + /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind + /// destination block for the given basic block, using the values for the + /// original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterUnwindDest); } @@ -300,7 +327,7 @@ namespace { }; } -/// Get or create a target for the branch out of rewritten calls to +/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to /// llvm.eh.resume. BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { if (InnerUnwindDest) return InnerUnwindDest; @@ -404,6 +431,60 @@ bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) { return true; } +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() { + // FIXME: New EH - rename this function when new EH is turned on. + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// forwardResume - Forward the 'resume' instruction to the caller's landing pad +/// block. When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void InvokeInliningInfo::forwardResume(ResumeInst *RI) { + BasicBlock *Dest = getInnerUnwindDestNewEH(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); +} + /// [LIBUNWIND] Check whether this selector is "only cleanups": /// call i32 @llvm.eh.selector(blah, blah, i32 0) static bool isCleanupOnlySelector(EHSelectorInst *selector) { @@ -421,9 +502,19 @@ static bool isCleanupOnlySelector(EHSelectorInst *selector) { /// Returns true to indicate that the next block should be skipped. static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInliningInfo &Invoke) { + LandingPadInst *LPI = Invoke.getLandingPadInst(); + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - + + if (LPI) // FIXME: New EH - This won't be NULL in the new EH. + if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { + unsigned NumClauses = LPI->getNumClauses(); + L->reserveClauses(NumClauses); + for (unsigned i = 0; i != NumClauses; ++i) + L->addClause(LPI->getClause(i)); + } + // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); @@ -557,6 +648,10 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // there is now a new entry in them. Invoke.addIncomingPHIValuesFor(BB); } + + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + Invoke.forwardResume(RI); + } } // Now that everything is happy, we have one final detail. The PHI nodes in @@ -636,7 +731,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -726,7 +821,7 @@ static bool isUsedByLifetimeMarker(Value *V) { // hasLifetimeMarkers - Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { - const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); + Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); if (AI->getType() == Int8PtrTy) return isUsedByLifetimeMarker(AI); @@ -770,8 +865,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (!DL.isUnknown()) + if (!DL.isUnknown()) { BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx)); + } + } } } } @@ -822,6 +924,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + for (Function::const_iterator + I = Caller->begin(), E = Caller->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system is + // in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + const Value *CallerPersFn = LP->getPersonalityFn(); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + for (Function::const_iterator + I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + if (CallerPersFn != LP->getPersonalityFn()) + return false; + break; + } + + break; + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // @@ -1090,7 +1226,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. - const Type *RTy = CalledFunc->getReturnType(); + Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 0f6d9ae..7034feb 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -227,13 +226,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + // We don't want the landingpad instruction removed by anything this general. + if (isa<LandingPadInst>(I)) + return false; + // We don't want debug info removed by anything this general, unless // debug info is empty. if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { - if (DDI->getAddress()) + if (DDI->getAddress()) return false; return true; - } + } if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { if (DVI->getValue()) return false; @@ -244,10 +247,16 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { // Special case intrinsics that "may have side effects" but can be deleted // when dead. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { // Safe to delete llvm.stacksave if dead. if (II->getIntrinsicID() == Intrinsic::stacksave) return true; + + // Lifetime intrinsics are dead when their right-hand is undef. + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + return isa<UndefValue>(II->getArgOperand(1)); + } return false; } @@ -712,10 +721,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign) { + unsigned PrefAlign, const TargetData *TD) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + return Align; // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) return AI->getAlignment(); @@ -766,7 +779,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, Align = std::min(Align, +Value::MaximumAlignment); if (PrefAlign > Align) - Align = enforceKnownAlignment(V, Align, PrefAlign); + Align = enforceKnownAlignment(V, Align, PrefAlign, TD); // We don't need to make any adjustment. return Align; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index e79fb5a..cbd54a8 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -213,7 +213,7 @@ ReprocessLoop: // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); - + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), @@ -325,6 +325,14 @@ ReprocessLoop: DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); + // If any reachable control flow within this loop has changed, notify + // ScalarEvolution. Currently assume the parent loop doesn't change + // (spliting edges doesn't count). If blocks, CFG edges, or other values + // in the parent loop change, then we need call to forgetLoop() for the + // parent instead. + if (SE) + SE->forgetLoop(L); + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); @@ -402,13 +410,24 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + BasicBlock *NewExitBB = 0; + + if (Exit->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0], + LoopBlocks.size()), + ".loopexit", ".nonloopexit", + this, NewBBs); + NewExitBB = NewBBs[0]; + } else { + NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], + LoopBlocks.size(), ".loopexit", + this); + } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewBB->getName() << "\n"); - return NewBB; + << NewExitBB->getName() << "\n"); + return NewExitBB; } /// AddBlockAndPredsToSet - Add the specified block, and all of its @@ -467,23 +486,23 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, if (&*BBI == SplitPreds[i]) return; } - + // If it isn't already after an outside block, move it after one. This is // always good as it makes the uncond branch from the outside block into a // fall-through. - + // Figure out *which* outside block to put this after. Prefer an outside // block that neighbors a BB actually in the loop. BasicBlock *FoundBB = 0; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && + if (++BBI != NewBB->getParent()->end() && L->contains(BBI)) { FoundBB = SplitPreds[i]; break; } } - + // If our heuristic for a *good* bb to place this after doesn't find // anything, just pick something. It's likely better than leaving it within // the loop. @@ -544,7 +563,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); - + // Create the new outer loop. Loop *NewOuter = new Loop(); @@ -735,6 +754,7 @@ void LoopSimplify::verifyAnalysis() const { } assert(HasIndBrPred && "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; } // Indirectbr can interfere with exit block canonicalization. @@ -742,12 +762,15 @@ void LoopSimplify::verifyAnalysis() const { bool HasIndBrExiting = false; SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { HasIndBrExiting = true; break; } + } + assert(HasIndBrExiting && "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; } } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 6772511..62e4fa2 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -11,9 +11,6 @@ // actual pass or policy, but provides a single function to perform loop // unrolling. // -// It works best when loops have been canonicalized by the -indvars pass, -// allowing it to determine the trip counts of loops easily. -// // The process of unrolling can produce extraneous basic blocks linked with // unconditional branches. This will be corrected in the future. // @@ -24,6 +21,7 @@ #include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" @@ -31,6 +29,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; // TODO: Should these be here or in LoopUnroll? @@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I, /// only has one predecessor, and that predecessor only has one successor. /// The LoopInfo Analysis that is passed will be kept consistent. /// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, + LPPassManager *LPM) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { std::string OldName = BB->getName(); // Erase basic block from the function... + + // ScalarEvolution holds references to loop exit blocks. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } LI->removeBlock(BB); BB->eraseFromParent(); @@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. -bool llvm::UnrollLoop(Loop *L, unsigned Count, - LoopInfo *LI, LPPassManager *LPM) { +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - + if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } - + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << @@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) SE->forgetLoop(L); - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - // Find trip multiple if count is not available - unsigned TripMultiple = 1; - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - OrigPHINode.push_back(PN); - if (Instruction *I = - dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) - if (L->contains(I)) - LastValueMap[I] = I; + OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; @@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, Headers.push_back(Header); Latches.push_back(LatchBlock); + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; - - for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), - E = LoopBlocks.end(); BB != E; ++BB) { + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); @@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, L->addBasicBlockToLoop(New, LI->getBase()); - // Add phi entries for newly created values to all exit blocks except - // the successor of the latch block. The successor of the exit block will - // be updated specially after unrolling all the way. - if (*BB != LatchBlock) - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; - ++SI) - if (!L->contains(*SI)) - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); - if (*BB == LatchBlock) { + if (*BB == LatchBlock) Latches.push_back(New); - // Also, clear out the new latch's back edge so that it doesn't look - // like a new loop, so that it's amenable to being merged with adjacent - // blocks later on. - TerminatorInst *Term = New->getTerminator(); - assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); - assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); - Term->setSuccessor(!ContinueOnTrue, NULL); - } - NewBlocks.push_back(New); } - + // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } - - // The latch block exits the loop. If there are any PHI nodes in the - // successor blocks, update them to use the appropriate values computed as the - // last iteration of the loop. - if (Count != 1) { - BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); - for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); - SI != SE; ++SI) { - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; - } - PN->addIncoming(InVal, LastIterationBB); - } - } - } - // Now, if we're doing complete unrolling, loop over the PHI nodes in the - // original block, setting them to their incoming values. - if (CompletelyUnroll) { - BasicBlock *Preheader = L->getLoopPreheader(); - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } } // Now that all the basic blocks for the unrolled iterations are in place, @@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); @@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } - + + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index c1213fa..61ab3f6 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -58,7 +58,7 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { return false; LLVMContext &Context = CI->getContext(); - const Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int32Ty = Type::getInt32Ty(Context); unsigned caseNo = SI->findCaseValue(ExpectedValue); std::vector<Value *> Vec; @@ -105,7 +105,7 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { return false; LLVMContext &Context = CI->getContext(); - const Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int32Ty = Type::getInt32Ty(Context); bool Likely = ExpectedValue->isOne(); // If expect value is equal to 1 it means that we are more likely to take diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index f77d19d..c96c8fc 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -120,18 +120,18 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty"); + JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty"); Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; JBLinkTy->setBody(Elts); - const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); + Type *PtrJBList = PointerType::getUnqual(JBLinkTy); // Now that we've done that, insert the jmpbuf list head global, unless it // already exists. @@ -240,14 +240,14 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); new StoreInst(StackSaveRet, StackPtr, true, II); // volatile - BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); + BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt(); // nonvolatile. new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), InvokeNum, false, NI); - Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true, - II->getUnwindDest()->getFirstNonPHI() - ); + Instruction* StackPtrLoad = + new LoadInst(StackPtr, "stackptr.restore", true, + II->getUnwindDest()->getFirstInsertionPt()); CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); // Add a switch case to our unwind block. @@ -305,7 +305,7 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - const Type *Ty = AI->getType(); + Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. @@ -406,6 +406,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { SmallVector<ReturnInst*,16> Returns; SmallVector<UnwindInst*,16> Unwinds; SmallVector<InvokeInst*,16> Invokes; + UnreachableInst* UnreachablePlaceholder = 0; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { @@ -455,8 +456,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], - "OldBuf", + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf", EntryBB->getTerminator()); // Copy the JBListHead to the alloca. @@ -487,9 +487,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action - // for a standard call). + // for a standard call). We insert an unreachable instruction here and + // modify the block to jump to the correct unwinding pad later. BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB)); + UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB); Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); SwitchInst *CatchSwitch = @@ -502,8 +503,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { "setjmp.cont"); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], - "TheJmpBuf", + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, Type::getInt8PtrTy(F.getContext()), @@ -557,8 +557,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Get a pointer to the jmpbuf and longjmp. Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; - Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf", - UnwindBlock); + Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); @@ -580,6 +579,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Unwinds[i]->eraseFromParent(); } + // Replace the inserted unreachable with a branch to the unwind handler. + if (UnreachablePlaceholder) { + BranchInst::Create(UnwindHandler, UnreachablePlaceholder); + UnreachablePlaceholder->eraseFromParent(); + } + // Finally, for any returns from this function, if this function contains an // invoke, restore the old jmpbuf pointer to its input value. if (OldJmpBufPtr) { diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index ed733d3..686178c 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -277,11 +277,11 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); - Value *Val = SI->getOperand(0); // The value we are switching on... + Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); // If there is only the default destination, don't bother with the code below. - if (SI->getNumOperands() == 2) { + if (SI->getNumCases() == 1) { BranchInst::Create(SI->getDefaultDest(), CurBlock); CurBlock->getInstList().erase(SI); return; diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e5a00f4..db3e942 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -86,11 +86,15 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { UI != UE; ++UI) { // Loop over all of the uses of the alloca const User *U = *UI; if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (LI->isVolatile()) return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index b47a7cc..fa8061c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -16,6 +16,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -43,7 +44,7 @@ SSAUpdater::~SSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates with type 'Ty'. PHI nodes get a name based on 'Name'. -void SSAUpdater::Initialize(const Type *Ty, StringRef Name) { +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); else @@ -378,8 +379,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. - // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element. - DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; @@ -395,7 +395,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; BasicBlock *BB = User->getParent(); - std::vector<Instruction*> &BlockUses = UsesByBlock[BB]; + TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9d9c324..b8c3ab4 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -63,6 +63,7 @@ class SimplifyCFGOpt { bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); bool SimplifyUnreachable(UnreachableInst *UI); @@ -322,7 +323,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - const IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -2138,6 +2139,52 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, return true; } +bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + // If this is a trivial landing pad that just continues unwinding the caught + // exception then zap the landing pad, turning its invokes into calls. + BasicBlock *BB = RI->getParent(); + LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + if (RI->getValue() != LPInst) + // Not a landing pad, or the resume is not unwinding the exception that + // caused control to branch here. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = LPInst, E = RI; + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); + + // Anything that used the value produced by the invoke instruction now uses + // the value produced by the call instruction. Note that we do this even + // for void functions and calls with no uses so that the callgraph edge is + // updated. + II->replaceAllUsesWith(Call); + BB->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + II->eraseFromParent(); + } + + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; @@ -2244,18 +2291,34 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { while (UI != BB->begin()) { BasicBlock::iterator BBI = UI; --BBI; - // Do not delete instructions that can have side effects, like calls - // (which may never return) and volatile loads and stores. + // Do not delete instructions that can have side effects which might cause + // the unreachable to not be reachable; specifically, calls and volatile + // operations may have this effect. if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; - - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (SI->isVolatile()) - break; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) - if (LI->isVolatile()) + + if (BBI->mayHaveSideEffects()) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->isVolatile()) + break; + } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->isVolatile()) + break; + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + if (CXI->isVolatile()) + break; + } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && + !isa<LandingPadInst>(BBI)) { break; - + } + // Note that deleting LandingPad's here is in fact okay, although it + // involves a bit of subtle reasoning. If this inst is a LandingPad, + // all the predecessors of this block will be the unwind edges of Invokes, + // and we can therefore guarantee this block will be erased. + } + // Delete this instruction (any uses are guaranteed to be dead) if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); @@ -2707,6 +2770,71 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return false; } +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + return false; + + if (C->isNullValue()) { + Instruction *Use = I->use_back(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + return LI->getPointerAddressSpace() == 0; + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer derefence), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (BasicBlock::iterator i = BB->begin(); + PHINode *PHI = dyn_cast<PHINode>(i); ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { + TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(PHI->getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : + BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } + + return false; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { bool Changed = false; @@ -2730,6 +2858,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); + // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -2752,6 +2883,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } else { if (SimplifyCondBranch(BI, Builder)) return true; } + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 0000000..76289c0 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -0,0 +1,432 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" + +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// SimplifyIndvar - This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + IVUsers *IU; // NULL for DisableIVRewrite + const TargetData *TD; // May be NULL + + SmallVectorImpl<WeakVH> &DeadInsts; + + bool Changed; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + L(Loop), + LI(LPM->getAnalysisIfAvailable<LoopInfo>()), + SE(SE), + IU(IVU), + TD(LPM->getAnalysisIfAvailable<TargetData>()), + DeadInsts(Dead), + Changed(false) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplicitions to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + }; +} + +/// foldIVUser - Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = 0; + unsigned OperIdx = 0; + const SCEV *FoldedExpr = 0; + switch (UseInst->getOpcode()) { + default: + return 0; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return 0; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (IVOperand->getNumOperands() != 2 || + !isa<ConstantInt>(IVOperand->getOperand(1))) + return 0; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return 0; + + D = ConstantInt::get(UseInst->getContext(), + APInt(BitWidth, 1).shl(D->getZExtValue())); + } + FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return 0; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return 0; + + DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.push_back(IVOperand); + return IVSrc; +} + +/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless +/// remainder operations operating on an induction variable. +void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1)); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + // Inform IVUsers about the new users. + if (IU) { + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + } + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); +} + +/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// simplifyUsers - Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encouters a clear optimization +/// opportunitiy. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseOper.first == CurrIV) continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + + Value *NewOper = foldIVUser(UseOper.first, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseOper.first, IVOperand)) { + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseOper.first, L, SE)) { + pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +/// simplifyUsersOfIV - Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) +{ + LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// simplifyLoopIVs - Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead); + } + return Changed; +} + +/// simplifyIVUsers - Perform simplification on instructions recorded by the +/// IVUsers pass. +/// +/// This is the old approach to IV simplification to be replaced by +/// SimplifyLoopIVs. +bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead); + + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + SIV.eliminateIVComparison(ICmp, IVOperand); + continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned); + continue; + } + } + } + return SIV.hasChanged(); +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 973b105..fc2538d 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -183,10 +183,9 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, } } - // Remap attached metadata. Don't bother remapping DebugLoc, it can never - // have mappings to do. + // Remap attached metadata. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadataOtherThanDebugLoc(MDs); + I->getAllMetadata(MDs); for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { MDNode *Old = MI->second; diff --git a/contrib/llvm/lib/VMCore/AsmWriter.cpp b/contrib/llvm/lib/VMCore/AsmWriter.cpp index 94794c3..18308f2 100644 --- a/contrib/llvm/lib/VMCore/AsmWriter.cpp +++ b/contrib/llvm/lib/VMCore/AsmWriter.cpp @@ -58,7 +58,7 @@ static const Module *getModuleFromVal(const Value *V) { const Function *M = I->getParent() ? I->getParent()->getParent() : 0; return M ? M->getParent() : 0; } - + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV->getParent(); return 0; @@ -142,18 +142,18 @@ public: /// NamedTypes - The named types that are used by the current module. std::vector<StructType*> NamedTypes; - + /// NumberedTypes - The numbered types, along with their value. DenseMap<StructType*, unsigned> NumberedTypes; - + TypePrinting() {} ~TypePrinting() {} - + void incorporateTypes(const Module &M); - + void print(Type *Ty, raw_ostream &OS); - + void printStructBody(StructType *Ty, raw_ostream &OS); }; } // end anonymous namespace. @@ -161,25 +161,25 @@ public: void TypePrinting::incorporateTypes(const Module &M) { M.findUsedStructTypes(NamedTypes); - + // The list of struct types we got back includes all the struct types, split // the unnamed ones out to a numbering and remove the anonymous structs. unsigned NextNumber = 0; - + std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E; for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) { StructType *STy = *I; - + // Ignore anonymous types. - if (STy->isAnonymous()) + if (STy->isLiteral()) continue; - + if (STy->getName().empty()) NumberedTypes[STy] = NextNumber++; else *NextToUse++ = STy; } - + NamedTypes.erase(NextToUse, NamedTypes.end()); } @@ -220,13 +220,13 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { } case Type::StructTyID: { StructType *STy = cast<StructType>(Ty); - - if (STy->isAnonymous()) + + if (STy->isLiteral()) return printStructBody(STy, OS); if (!STy->getName().empty()) return PrintLLVMName(OS, STy->getName(), LocalPrefix); - + DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy); if (I != NumberedTypes.end()) OS << '%' << I->second; @@ -267,10 +267,10 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { OS << "opaque"; return; } - + if (STy->isPacked()) OS << '<'; - + if (STy->getNumElements() == 0) { OS << "{}"; } else { @@ -281,7 +281,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { OS << ", "; print(*I, OS); } - + OS << " }"; } if (STy->isPacked()) @@ -386,7 +386,8 @@ static SlotTracker *createSlotTracker(const Value *V) { return new SlotTracker(FA->getParent()); if (const Instruction *I = dyn_cast<Instruction>(V)) - return new SlotTracker(I->getParent()->getParent()); + if (I->getParent()) + return new SlotTracker(I->getParent()->getParent()); if (const BasicBlock *BB = dyn_cast<BasicBlock>(V)) return new SlotTracker(BB->getParent()); @@ -419,7 +420,7 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. SlotTracker::SlotTracker(const Module *M) - : TheModule(M), TheFunction(0), FunctionProcessed(false), + : TheModule(M), TheFunction(0), FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0) { } @@ -490,12 +491,12 @@ void SlotTracker::processFunction() { E = TheFunction->end(); BB != E; ++BB) { if (!BB->hasName()) CreateFunctionSlot(BB); - + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (!I->getType()->isVoidTy() && !I->hasName()) CreateFunctionSlot(I); - + // Intrinsics can directly use metadata. We allow direct calls to any // llvm.foo function here, because the target may not be linked into the // optimizer. @@ -658,6 +659,23 @@ static const char *getPredicateText(unsigned predicate) { return pred; } +static void writeAtomicRMWOperation(raw_ostream &Out, + AtomicRMWInst::BinOp Op) { + switch (Op) { + default: Out << " <unknown operation " << Op << ">"; break; + case AtomicRMWInst::Xchg: Out << " xchg"; break; + case AtomicRMWInst::Add: Out << " add"; break; + case AtomicRMWInst::Sub: Out << " sub"; break; + case AtomicRMWInst::And: Out << " and"; break; + case AtomicRMWInst::Nand: Out << " nand"; break; + case AtomicRMWInst::Or: Out << " or"; break; + case AtomicRMWInst::Xor: Out << " xor"; break; + case AtomicRMWInst::Max: Out << " max"; break; + case AtomicRMWInst::Min: Out << " min"; break; + case AtomicRMWInst::UMax: Out << " umax"; break; + case AtomicRMWInst::UMin: Out << " umin"; break; + } +} static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { if (const OverflowingBinaryOperator *OBO = @@ -792,7 +810,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, Out << "zeroinitializer"; return; } - + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { Out << "blockaddress("; WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine, @@ -939,13 +957,13 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, else { TypePrinter->print(V->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, Node->getOperand(mi), + WriteAsOperandInternal(Out, Node->getOperand(mi), TypePrinter, Machine, Context); } if (mi + 1 != me) Out << ", "; } - + Out << "}"; } @@ -990,7 +1008,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context); return; } - + if (!Machine) { if (N->isFunctionLocal()) Machine = new SlotTracker(N->getFunction()); @@ -1020,26 +1038,35 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, char Prefix = '%'; int Slot; + // If we have a SlotTracker, use it. if (Machine) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { Slot = Machine->getGlobalSlot(GV); Prefix = '@'; } else { Slot = Machine->getLocalSlot(V); + + // If the local value didn't succeed, then we may be referring to a value + // from a different function. Translate it, as this can happen when using + // address of blocks. + if (Slot == -1) + if ((Machine = createSlotTracker(V))) { + Slot = Machine->getLocalSlot(V); + delete Machine; + } } - } else { - Machine = createSlotTracker(V); - if (Machine) { - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - Slot = Machine->getGlobalSlot(GV); - Prefix = '@'; - } else { - Slot = Machine->getLocalSlot(V); - } - delete Machine; + } else if ((Machine = createSlotTracker(V))) { + // Otherwise, create one to get the # and then destroy it. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + Slot = Machine->getGlobalSlot(GV); + Prefix = '@'; } else { - Slot = -1; + Slot = Machine->getLocalSlot(V); } + delete Machine; + Machine = 0; + } else { + Slot = -1; } if (Slot != -1) @@ -1081,7 +1108,7 @@ class AssemblyWriter { const Module *TheModule; TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; - + public: inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, @@ -1093,11 +1120,12 @@ public: void printMDNodeBody(const MDNode *MD); void printNamedMDNode(const NamedMDNode *NMD); - + void printModule(const Module *M); void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, Attributes Attrs); + void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); void writeAllMDNodes(); @@ -1128,6 +1156,28 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } +void AssemblyWriter::writeAtomic(AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Ordering == NotAtomic) + return; + + switch (SynchScope) { + default: Out << " <bad scope " << int(SynchScope) << ">"; break; + case SingleThread: Out << " singlethread"; break; + case CrossThread: break; + } + + switch (Ordering) { + default: Out << " <bad ordering " << int(Ordering) << ">"; break; + case Unordered: Out << " unordered"; break; + case Monotonic: Out << " monotonic"; break; + case Acquire: Out << " acquire"; break; + case Release: Out << " release"; break; + case AcquireRelease: Out << " acq_rel"; break; + case SequentiallyConsistent: Out << " seq_cst"; break; + } +} + void AssemblyWriter::writeParamOperand(const Value *Operand, Attributes Attrs) { if (Operand == 0) { @@ -1216,7 +1266,7 @@ void AssemblyWriter::printModule(const Module *M) { // Output named metadata. if (!M->named_metadata_empty()) Out << '\n'; - + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), E = M->named_metadata_end(); I != E; ++I) printNamedMDNode(I); @@ -1357,26 +1407,8 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { if (Aliasee == 0) { TypePrinter.print(GA->getType(), Out); Out << " <<NULL ALIASEE>>"; - } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) { - TypePrinter.print(GV->getType(), Out); - Out << ' '; - PrintLLVMName(Out, GV); - } else if (const Function *F = dyn_cast<Function>(Aliasee)) { - TypePrinter.print(F->getFunctionType(), Out); - Out << "* "; - - WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); - } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) { - TypePrinter.print(GA->getType(), Out); - Out << ' '; - PrintLLVMName(Out, GA); } else { - const ConstantExpr *CE = cast<ConstantExpr>(Aliasee); - // The only valid GEP is an all zero GEP. - assert((CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr) && - "Unsupported aliasee"); - writeOperand(CE, false); + writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee)); } printInfoComment(*GA); @@ -1387,29 +1419,29 @@ void AssemblyWriter::printTypeIdentities() { if (TypePrinter.NumberedTypes.empty() && TypePrinter.NamedTypes.empty()) return; - + Out << '\n'; - + // We know all the numbers that each type is used and we know that it is a // dense assignment. Convert the map to an index table. std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size()); - for (DenseMap<StructType*, unsigned>::iterator I = + for (DenseMap<StructType*, unsigned>::iterator I = TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end(); I != E; ++I) { assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?"); NumberedTypes[I->second] = I->first; } - + // Emit all numbered types. for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) { Out << '%' << i << " = type "; - + // Make sure we print out at least one level of the type structure, so // that we do not get %2 = type %2 TypePrinter.printStructBody(NumberedTypes[i], Out); Out << '\n'; } - + for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) { PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix); Out << " = type "; @@ -1457,7 +1489,7 @@ void AssemblyWriter::printFunction(const Function *F) { default: Out << "cc" << F->getCallingConv() << " "; break; } - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); const AttrListPtr &Attrs = F->getAttributes(); Attributes RetAttrs = Attrs.getRetAttributes(); if (RetAttrs != Attribute::None) @@ -1628,18 +1660,24 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << '%' << SlotNum << " = "; } - // If this is a volatile load or store, print out the volatile marker. - if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) || - (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) { - Out << "volatile "; - } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) { - // If this is a call, check if it's a tail call. + if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) Out << "tail "; - } // Print out the opcode... Out << I.getOpcodeName(); + // If this is an atomic load or store, print out the atomic marker. + if ((isa<LoadInst>(I) && cast<LoadInst>(I).isAtomic()) || + (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic())) + Out << " atomic"; + + // If this is a volatile operation, print out the volatile marker. + if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) || + (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) || + (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isVolatile()) || + (isa<AtomicRMWInst>(I) && cast<AtomicRMWInst>(I).isVolatile())) + Out << " volatile"; + // Print out optimization information. WriteOptimizationInfo(Out, &I); @@ -1647,6 +1685,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (const CmpInst *CI = dyn_cast<CmpInst>(&I)) Out << ' ' << getPredicateText(CI->getPredicate()); + // Print out the atomicrmw operation + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) + writeAtomicRMWOperation(Out, RMWI->getOperation()); + // Print out the type of the operands... const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0; @@ -1661,18 +1703,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(BI.getSuccessor(1), true); } else if (isa<SwitchInst>(I)) { + SwitchInst& SI(cast<SwitchInst>(I)); // Special case switch instruction to get formatting nice and correct. Out << ' '; - writeOperand(Operand , true); + writeOperand(SI.getCondition(), true); Out << ", "; - writeOperand(I.getOperand(1), true); + writeOperand(SI.getDefaultDest(), true); Out << " ["; - - for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) { + // Skip the first item since that's the default case. + unsigned NumCases = SI.getNumCases(); + for (unsigned i = 1; i < NumCases; ++i) { Out << "\n "; - writeOperand(I.getOperand(op ), true); + writeOperand(SI.getCaseValue(i), true); Out << ", "; - writeOperand(I.getOperand(op+1), true); + writeOperand(SI.getSuccessor(i), true); } Out << "\n ]"; } else if (isa<IndirectBrInst>(I)) { @@ -1680,7 +1724,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ' '; writeOperand(Operand, true); Out << ", ["; - + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { if (i != 1) Out << ", "; @@ -1709,6 +1753,24 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(I.getOperand(1), true); for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i) Out << ", " << *i; + } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) { + Out << ' '; + TypePrinter.print(I.getType(), Out); + Out << " personality "; + writeOperand(I.getOperand(0), true); Out << '\n'; + + if (LPI->isCleanup()) + Out << " cleanup"; + + for (unsigned i = 0, e = LPI->getNumClauses(); i != e; ++i) { + if (i != 0 || LPI->isCleanup()) Out << "\n"; + if (LPI->isCatch(i)) + Out << " catch "; + else + Out << " filter "; + + writeOperand(LPI->getClause(i), true); + } } else if (isa<ReturnInst>(I) && !Operand) { Out << " void"; } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) { @@ -1878,11 +1940,23 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } } - // Print post operand alignment for load/store. - if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) { - Out << ", align " << cast<LoadInst>(I).getAlignment(); - } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) { - Out << ", align " << cast<StoreInst>(I).getAlignment(); + // Print atomic ordering/alignment for memory operations + if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { + if (LI->isAtomic()) + writeAtomic(LI->getOrdering(), LI->getSynchScope()); + if (LI->getAlignment()) + Out << ", align " << LI->getAlignment(); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) { + if (SI->isAtomic()) + writeAtomic(SI->getOrdering(), SI->getSynchScope()); + if (SI->getAlignment()) + Out << ", align " << SI->getAlignment(); + } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) { + writeAtomic(CXI->getOrdering(), CXI->getSynchScope()); + } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) { + writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope()); + } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) { + writeAtomic(FI->getOrdering(), FI->getSynchScope()); } // Print Metadata info. @@ -1916,7 +1990,7 @@ static void WriteMDNodeComment(const MDNode *Node, APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask); if (Val.ult(LLVMDebugVersion)) return; - + Out.PadToColumn(50); if (Tag == dwarf::DW_TAG_user_base) Out << "; [ DW_TAG_user_base ]"; @@ -1932,7 +2006,7 @@ void AssemblyWriter::writeAllMDNodes() { for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end(); I != E; ++I) Nodes[I->second] = cast<MDNode>(I->first); - + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { Out << '!' << i << " = metadata "; printMDNodeBody(Nodes[i]); @@ -1970,10 +2044,10 @@ void Type::print(raw_ostream &OS) const { } TypePrinting TP; TP.print(const_cast<Type*>(this), OS); - + // If the type is a named struct type, print the body as well. if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this))) - if (!STy->isAnonymous()) { + if (!STy->isLiteral()) { OS << " = type "; TP.printStructBody(STy, OS); } diff --git a/contrib/llvm/lib/VMCore/Attributes.cpp b/contrib/llvm/lib/VMCore/Attributes.cpp index bf6efa1..485be75 100644 --- a/contrib/llvm/lib/VMCore/Attributes.cpp +++ b/contrib/llvm/lib/VMCore/Attributes.cpp @@ -38,6 +38,8 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "nounwind "; if (Attrs & Attribute::UWTable) Result += "uwtable "; + if (Attrs & Attribute::ReturnsTwice) + Result += "returns_twice "; if (Attrs & Attribute::InReg) Result += "inreg "; if (Attrs & Attribute::NoAlias) @@ -72,8 +74,6 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "noimplicitfloat "; if (Attrs & Attribute::Naked) Result += "naked "; - if (Attrs & Attribute::Hotpatch) - Result += "hotpatch "; if (Attrs & Attribute::NonLazyBind) Result += "nonlazybind "; if (Attrs & Attribute::StackAlignment) { @@ -92,7 +92,7 @@ std::string Attribute::getAsString(Attributes Attrs) { return Result; } -Attributes Attribute::typeIncompatible(const Type *Ty) { +Attributes Attribute::typeIncompatible(Type *Ty) { Attributes Incompatible = None; if (!Ty->isIntegerTy()) diff --git a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp index 9e93ff3..b849d3e 100644 --- a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp +++ b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp @@ -14,11 +14,15 @@ #include "llvm/AutoUpgrade.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Instruction.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/IntrinsicInst.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include <cstring> @@ -34,11 +38,48 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return false; Name = Name.substr(5); // Strip off "llvm." - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); Module *M = F->getParent(); switch (Name[0]) { default: break; + case 'a': + if (Name.startswith("atomic.cmp.swap") || + Name.startswith("atomic.swap") || + Name.startswith("atomic.load.add") || + Name.startswith("atomic.load.sub") || + Name.startswith("atomic.load.and") || + Name.startswith("atomic.load.nand") || + Name.startswith("atomic.load.or") || + Name.startswith("atomic.load.xor") || + Name.startswith("atomic.load.max") || + Name.startswith("atomic.load.min") || + Name.startswith("atomic.load.umax") || + Name.startswith("atomic.load.umin")) + return true; + case 'i': + // This upgrades the old llvm.init.trampoline to the new + // llvm.init.trampoline and llvm.adjust.trampoline pair. + if (Name == "init.trampoline") { + // The new llvm.init.trampoline returns nothing. + if (FTy->getReturnType()->isVoidTy()) + break; + + assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!"); + + // Change the name of the old intrinsic so that we can play with its type. + std::string NameTmp = F->getName(); + F->setName(""); + NewFn = cast<Function>(M->getOrInsertFunction( + NameTmp, + Type::getVoidTy(M->getContext()), + FTy->getParamType(0), FTy->getParamType(1), + FTy->getParamType(2), (Type *)0)); + return true; + } + case 'm': + if (Name == "memory.barrier") + return true; case 'p': // This upgrades the llvm.prefetch intrinsic to accept one more parameter, // which is a instruction / data cache identifier. The old version only @@ -139,8 +180,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { F->getName() == "llvm.x86.sse2.loadu.dq" || F->getName() == "llvm.x86.sse2.loadu.pd") { // Convert to a native, unaligned load. - const Type *VecTy = CI->getType(); - const Type *IntTy = IntegerType::get(C, 128); + Type *VecTy = CI->getType(); + Type *IntTy = IntegerType::get(C, 128); IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); @@ -182,6 +223,80 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove intrinsic. CI->eraseFromParent(); + } else if (F->getName().startswith("llvm.atomic.cmp.swap")) { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0), + CI->getArgOperand(1), + CI->getArgOperand(2), + Monotonic); + + // Replace intrinsic. + Val->takeName(CI); + if (!CI->use_empty()) + CI->replaceAllUsesWith(Val); + CI->eraseFromParent(); + } else if (F->getName().startswith("llvm.atomic")) { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + AtomicRMWInst::BinOp Op; + if (F->getName().startswith("llvm.atomic.swap")) + Op = AtomicRMWInst::Xchg; + else if (F->getName().startswith("llvm.atomic.load.add")) + Op = AtomicRMWInst::Add; + else if (F->getName().startswith("llvm.atomic.load.sub")) + Op = AtomicRMWInst::Sub; + else if (F->getName().startswith("llvm.atomic.load.and")) + Op = AtomicRMWInst::And; + else if (F->getName().startswith("llvm.atomic.load.nand")) + Op = AtomicRMWInst::Nand; + else if (F->getName().startswith("llvm.atomic.load.or")) + Op = AtomicRMWInst::Or; + else if (F->getName().startswith("llvm.atomic.load.xor")) + Op = AtomicRMWInst::Xor; + else if (F->getName().startswith("llvm.atomic.load.max")) + Op = AtomicRMWInst::Max; + else if (F->getName().startswith("llvm.atomic.load.min")) + Op = AtomicRMWInst::Min; + else if (F->getName().startswith("llvm.atomic.load.umax")) + Op = AtomicRMWInst::UMax; + else if (F->getName().startswith("llvm.atomic.load.umin")) + Op = AtomicRMWInst::UMin; + else + llvm_unreachable("Unknown atomic"); + + Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0), + CI->getArgOperand(1), + Monotonic); + + // Replace intrinsic. + Val->takeName(CI); + if (!CI->use_empty()) + CI->replaceAllUsesWith(Val); + CI->eraseFromParent(); + } else if (F->getName() == "llvm.memory.barrier") { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + // Note that this conversion ignores the "device" bit; it was not really + // well-defined, and got abused because nobody paid enough attention to + // get it right. In practice, this probably doesn't matter; application + // code generally doesn't need anything stronger than + // SequentiallyConsistent (and realistically, SequentiallyConsistent + // is lowered to a strong enough barrier for almost anything). + + if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue()) + Builder.CreateFence(SequentiallyConsistent); + else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue()) + Builder.CreateFence(Release); + else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue()) + Builder.CreateFence(Acquire); + else + Builder.CreateFence(AcquireRelease); + + // Remove intrinsic. + CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } @@ -192,7 +307,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::prefetch: { IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); - const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); + llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); // Add the extra "data cache" argument Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1), @@ -212,6 +327,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); break; } + case Intrinsic::init_trampoline: { + + // Transform + // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z) + // to + // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z) + // %tramp = call i8* llvm.adjust.trampoline (i8* %x) + + Function *AdjustTrampolineFn = + cast<Function>(Intrinsic::getDeclaration(F->getParent(), + Intrinsic::adjust_trampoline)); + + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI); + + Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2)); + + CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn, + CI->getArgOperand(0), + CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(AdjustCall); + CI->eraseFromParent(); + break; + } } } @@ -279,3 +420,249 @@ void llvm::CheckDebugInfoIntrinsics(Module *M) { } } } + +/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic +/// calls reachable from the unwind basic block. +static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn, + CallInst *&Sel, + SmallPtrSet<BasicBlock*, 8> &Visited) { + if (!Visited.insert(BB)) return; + + for (BasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + switch (CI->getCalledFunction()->getIntrinsicID()) { + default: break; + case Intrinsic::eh_exception: + assert(!Exn && "Found more than one eh.exception call!"); + Exn = CI; + break; + case Intrinsic::eh_selector: + assert(!Sel && "Found more than one eh.selector call!"); + Sel = CI; + break; + } + + if (Exn && Sel) return; + } + } + + if (Exn && Sel) return; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + FindExnAndSelIntrinsics(*I, Exn, Sel, Visited); + if (Exn && Sel) return; + } +} + +/// TransferClausesToLandingPadInst - Transfer the exception handling clauses +/// from the eh_selector call to the new landingpad instruction. +static void TransferClausesToLandingPadInst(LandingPadInst *LPI, + CallInst *EHSel) { + LLVMContext &Context = LPI->getContext(); + unsigned N = EHSel->getNumArgOperands(); + + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){ + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert(FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) + for (unsigned j = FirstCatch; j < N; ++j) { + Value *Val = EHSel->getArgOperand(j); + if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") { + LPI->addClause(EHSel->getArgOperand(j)); + } else { + GlobalVariable *GV = cast<GlobalVariable>(Val); + LPI->addClause(GV->getInitializer()); + } + } + + if (!FilterLength) { + // Cleanup. + LPI->setCleanup(true); + } else { + // Filter. + SmallVector<Constant *, 4> TyInfo; + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j))); + ArrayType *AType = + ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() : + PointerType::getUnqual(Type::getInt8Ty(Context)), + TyInfo.size()); + LPI->addClause(ConstantArray::get(AType, TyInfo)); + } + + N = i; + } + } + + if (N > 2) + for (unsigned j = 2; j < N; ++j) { + Value *Val = EHSel->getArgOperand(j); + if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") { + LPI->addClause(EHSel->getArgOperand(j)); + } else { + GlobalVariable *GV = cast<GlobalVariable>(Val); + LPI->addClause(GV->getInitializer()); + } + } +} + +/// This function upgrades the old pre-3.0 exception handling system to the new +/// one. N.B. This will be removed in 3.1. +void llvm::UpgradeExceptionHandling(Module *M) { + Function *EHException = M->getFunction("llvm.eh.exception"); + Function *EHSelector = M->getFunction("llvm.eh.selector"); + if (!EHException || !EHSelector) + return; + + LLVMContext &Context = M->getContext(); + Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context)); + Type *SelTy = Type::getInt32Ty(Context); + Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL); + + // This map links the invoke instruction with the eh.exception and eh.selector + // calls associated with it. + DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap; + for (Module::iterator + I = M->begin(), E = M->end(); I != E; ++I) { + Function &F = *I; + + for (Function::iterator + II = F.begin(), IE = F.end(); II != IE; ++II) { + BasicBlock *BB = &*II; + InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator()); + if (!Inst) continue; + BasicBlock *UnwindDest = Inst->getUnwindDest(); + if (UnwindDest->isLandingPad()) continue; // Already converted. + + SmallPtrSet<BasicBlock*, 8> Visited; + CallInst *Exn = 0; + CallInst *Sel = 0; + FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited); + assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!"); + InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel); + } + } + + // This map stores the slots where the exception object and selector value are + // stored within a function. + DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap; + SmallPtrSet<Instruction*, 32> DeadInsts; + for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator + I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end(); + I != E; ++I) { + InvokeInst *Invoke = I->first; + BasicBlock *UnwindDest = Invoke->getUnwindDest(); + Function *F = UnwindDest->getParent(); + std::pair<Value*, Value*> EHIntrinsics = I->second; + CallInst *Exn = cast<CallInst>(EHIntrinsics.first); + CallInst *Sel = cast<CallInst>(EHIntrinsics.second); + + // Store the exception object and selector value in the entry block. + Value *ExnSlot = 0; + Value *SelSlot = 0; + if (!FnToLPadSlotMap[F].first) { + BasicBlock *Entry = &F->front(); + ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator()); + SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator()); + FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot); + } else { + ExnSlot = FnToLPadSlotMap[F].first; + SelSlot = FnToLPadSlotMap[F].second; + } + + if (!UnwindDest->getSinglePredecessor()) { + // The unwind destination doesn't have a single predecessor. Create an + // unwind destination which has only one predecessor. + BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad", + UnwindDest->getParent()); + BranchInst::Create(UnwindDest, NewBB); + Invoke->setUnwindDest(NewBB); + + // Fix up any PHIs in the original unwind destination block. + for (BasicBlock::iterator + II = UnwindDest->begin(); isa<PHINode>(II); ++II) { + PHINode *PN = cast<PHINode>(II); + int Idx = PN->getBasicBlockIndex(Invoke->getParent()); + if (Idx == -1) continue; + PN->setIncomingBlock(Idx, NewBB); + } + + UnwindDest = NewBB; + } + + IRBuilder<> Builder(Context); + Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt()); + + Value *PersFn = Sel->getArgOperand(1); + LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0); + Value *LPExn = Builder.CreateExtractValue(LPI, 0); + Value *LPSel = Builder.CreateExtractValue(LPI, 1); + Builder.CreateStore(LPExn, ExnSlot); + Builder.CreateStore(LPSel, SelSlot); + + TransferClausesToLandingPadInst(LPI, Sel); + + DeadInsts.insert(Exn); + DeadInsts.insert(Sel); + } + + // Replace the old intrinsic calls with the values from the landingpad + // instruction(s). These values were stored in allocas for us to use here. + for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator + I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end(); + I != E; ++I) { + std::pair<Value*, Value*> EHIntrinsics = I->second; + CallInst *Exn = cast<CallInst>(EHIntrinsics.first); + CallInst *Sel = cast<CallInst>(EHIntrinsics.second); + BasicBlock *Parent = Exn->getParent(); + + std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()]; + + IRBuilder<> Builder(Context); + Builder.SetInsertPoint(Parent, Exn); + LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load"); + LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load"); + + Exn->replaceAllUsesWith(LPExn); + Sel->replaceAllUsesWith(LPSel); + } + + // Remove the dead instructions. + for (SmallPtrSet<Instruction*, 32>::iterator + I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) { + Instruction *Inst = *I; + Inst->eraseFromParent(); + } + + // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the + // exception and selector values from the stored place. + Function *EHResume = M->getFunction("llvm.eh.resume"); + if (!EHResume) return; + + while (!EHResume->use_empty()) { + CallInst *Resume = cast<CallInst>(EHResume->use_back()); + BasicBlock *BB = Resume->getParent(); + + IRBuilder<> Builder(Context); + Builder.SetInsertPoint(BB, Resume); + + Value *LPadVal = + Builder.CreateInsertValue(UndefValue::get(LPadSlotTy), + Resume->getArgOperand(0), 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1), + 1, "lpad.val"); + Builder.CreateResume(LPadVal); + + // Remove all instructions after the 'resume.' + BasicBlock::iterator I = Resume; + while (I != BB->end()) { + Instruction *Inst = &*I++; + Inst->eraseFromParent(); + } + } +} diff --git a/contrib/llvm/lib/VMCore/BasicBlock.cpp b/contrib/llvm/lib/VMCore/BasicBlock.cpp index 70265c8..d0aa275 100644 --- a/contrib/llvm/lib/VMCore/BasicBlock.cpp +++ b/contrib/llvm/lib/VMCore/BasicBlock.cpp @@ -53,7 +53,7 @@ BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent, } else if (NewParent) { NewParent->getBasicBlockList().push_back(this); } - + setName(Name); } @@ -76,7 +76,7 @@ BasicBlock::~BasicBlock() { BA->destroyConstant(); } } - + assert(getParent() == 0 && "BasicBlock still linked into the program!"); dropAllReferences(); InstList.clear(); @@ -167,6 +167,12 @@ Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() { return &*i; } +BasicBlock::iterator BasicBlock::getFirstInsertionPt() { + iterator InsertPt = getFirstNonPHI(); + if (isa<LandingPadInst>(InsertPt)) ++InsertPt; + return InsertPt; +} + void BasicBlock::dropAllReferences() { for(iterator I = begin(), E = end(); I != E; ++I) I->dropAllReferences(); @@ -184,8 +190,8 @@ BasicBlock *BasicBlock::getSinglePredecessor() { /// getUniquePredecessor - If this basic block has a unique predecessor block, /// return the block, otherwise return a null pointer. -/// Note that unique predecessor doesn't mean single edge, there can be -/// multiple edges from the unique predecessor to this block (for example +/// Note that unique predecessor doesn't mean single edge, there can be +/// multiple edges from the unique predecessor to this block (for example /// a switch statement with multiple cases having the same destination). BasicBlock *BasicBlock::getUniquePredecessor() { pred_iterator PI = pred_begin(this), E = pred_end(this); @@ -336,11 +342,27 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { return; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { BasicBlock *Succ = TI->getSuccessor(i); - for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II); - ++II) { + // N.B. Succ might not be a complete BasicBlock, so don't assume + // that it ends with a non-phi instruction. + for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) { + PHINode *PN = dyn_cast<PHINode>(II); + if (!PN) + break; int i; while ((i = PN->getBasicBlockIndex(this)) >= 0) PN->setIncomingBlock(i, New); } } } + +/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's +/// the destination of the 'unwind' edge of an invoke instruction. +bool BasicBlock::isLandingPad() const { + return isa<LandingPadInst>(getFirstNonPHI()); +} + +/// getLandingPadInst() - Return the landingpad instruction associated with +/// the landing pad. +LandingPadInst *BasicBlock::getLandingPadInst() { + return dyn_cast<LandingPadInst>(getFirstNonPHI()); +} diff --git a/contrib/llvm/lib/VMCore/ConstantFold.cpp b/contrib/llvm/lib/VMCore/ConstantFold.cpp index 323e2a2..30bae71 100644 --- a/contrib/llvm/lib/VMCore/ConstantFold.cpp +++ b/contrib/llvm/lib/VMCore/ConstantFold.cpp @@ -42,7 +42,7 @@ using namespace llvm; /// specified vector type. At this point, we know that the elements of the /// input vector constant are all simple integer or FP values. static Constant *BitCastConstantVector(ConstantVector *CV, - const VectorType *DstTy) { + VectorType *DstTy) { if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy); if (CV->isNullValue()) return Constant::getNullValue(DstTy); @@ -63,7 +63,7 @@ static Constant *BitCastConstantVector(ConstantVector *CV, // Bitcast each element now. std::vector<Constant*> Result; - const Type *DstEltTy = DstTy->getElementType(); + Type *DstEltTy = DstTy->getElementType(); for (unsigned i = 0; i != NumElts; ++i) Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i), DstEltTy)); @@ -78,15 +78,15 @@ static unsigned foldConstantCastPair( unsigned opc, ///< opcode of the second cast constant expression ConstantExpr *Op, ///< the first cast constant expression - const Type *DstTy ///< desintation type of the first cast + Type *DstTy ///< desintation type of the first cast ) { assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!"); assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type"); assert(CastInst::isCast(opc) && "Invalid cast opcode"); // The the types and opcodes for the two Cast constant expressions - const Type *SrcTy = Op->getOperand(0)->getType(); - const Type *MidTy = Op->getType(); + Type *SrcTy = Op->getOperand(0)->getType(); + Type *MidTy = Op->getType(); Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opc); @@ -95,27 +95,27 @@ foldConstantCastPair( Type::getInt64Ty(DstTy->getContext())); } -static Constant *FoldBitCast(Constant *V, const Type *DestTy) { - const Type *SrcTy = V->getType(); +static Constant *FoldBitCast(Constant *V, Type *DestTy) { + Type *SrcTy = V->getType(); if (SrcTy == DestTy) return V; // no-op cast // Check to see if we are casting a pointer to an aggregate to a pointer to // the first element. If so, return the appropriate GEP instruction. - if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) - if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy)) + if (PointerType *PTy = dyn_cast<PointerType>(V->getType())) + if (PointerType *DPTy = dyn_cast<PointerType>(DestTy)) if (PTy->getAddressSpace() == DPTy->getAddressSpace()) { SmallVector<Value*, 8> IdxList; Value *Zero = Constant::getNullValue(Type::getInt32Ty(DPTy->getContext())); IdxList.push_back(Zero); - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); while (ElTy != DPTy->getElementType()) { - if (const StructType *STy = dyn_cast<StructType>(ElTy)) { + if (StructType *STy = dyn_cast<StructType>(ElTy)) { if (STy->getNumElements() == 0) break; ElTy = STy->getElementType(0); IdxList.push_back(Zero); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(ElTy)) { if (ElTy->isPointerTy()) break; // Can't index into pointers! ElTy = STy->getElementType(); @@ -127,14 +127,13 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) { if (ElTy == DPTy->getElementType()) // This GEP is inbounds because all indices are zero. - return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0], - IdxList.size()); + return ConstantExpr::getInBoundsGetElementPtr(V, IdxList); } // Handle casts from one vector constant to another. We know that the src // and dest type have the same size (otherwise its an illegal cast). - if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) { - if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) { + if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) { + if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) { assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() && "Not cast between same sized vectors!"); SrcTy = NULL; @@ -332,15 +331,15 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, /// return null if no factoring was possible, to avoid endlessly /// bouncing an unfoldable expression back into the top-level folder. /// -static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, +static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); return ConstantExpr::getNUWMul(E, N); } - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked()) { unsigned NumElems = STy->getNumElements(); // An empty struct has size zero. @@ -364,7 +363,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, // Pointer size doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. - if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) if (!PTy->getElementType()->isIntegerTy(1)) return getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1), @@ -389,11 +388,11 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, /// return null if no factoring was possible, to avoid endlessly /// bouncing an unfoldable expression back into the top-level folder. /// -static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, +static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { // The alignment of an array is equal to the alignment of the // array element. Note that this is not always true for vectors. - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Constant *C = ConstantExpr::getAlignOf(ATy->getElementType()); C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, DestTy, @@ -402,7 +401,7 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, return C; } - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { // Packed structs always have an alignment of 1. if (STy->isPacked()) return ConstantInt::get(DestTy, 1); @@ -429,7 +428,7 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, // Pointer alignment doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. - if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) if (!PTy->getElementType()->isIntegerTy(1)) return getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), @@ -455,10 +454,10 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, /// return null if no factoring was possible, to avoid endlessly /// bouncing an unfoldable expression back into the top-level folder. /// -static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo, - const Type *DestTy, +static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, + Type *DestTy, bool Folded) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, DestTy, false), FieldNo, DestTy); @@ -466,7 +465,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo, return ConstantExpr::getNUWMul(E, N); } - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked()) { unsigned NumElems = STy->getNumElements(); // An empty struct has no members. @@ -506,7 +505,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo, } Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, - const Type *DestTy) { + Type *DestTy) { if (isa<UndefValue>(V)) { // zext(undef) = 0, because the top bits will be zero. // sext(undef) = 0, because the top bits will all be the same. @@ -554,8 +553,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, cast<VectorType>(DestTy)->getNumElements() == CV->getType()->getNumElements()) { std::vector<Constant*> res; - const VectorType *DestVecTy = cast<VectorType>(DestTy); - const Type *DstEltTy = DestVecTy->getElementType(); + VectorType *DestVecTy = cast<VectorType>(DestTy); + Type *DstEltTy = DestVecTy->getElementType(); for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy)); @@ -590,7 +589,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth(); (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI, APFloat::rmTowardZero, &ignored); - APInt Val(DestBitWidth, 2, x); + APInt Val(DestBitWidth, x); return ConstantInt::get(FPC->getContext(), Val); } return 0; // Can't fold. @@ -608,7 +607,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue()) { - const Type *Ty = + Type *Ty = cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); if (CE->getNumOperands() == 2) { // Handle a sizeof-like expression. @@ -623,7 +622,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } else if (CE->getNumOperands() == 3 && CE->getOperand(1)->isNullValue()) { // Handle an alignof-like expression. - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked()) { ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2)); if (CI->isOne() && @@ -701,7 +700,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, if (CondV->isAllOnesValue()) return V1; - const VectorType *VTy = cast<VectorType>(V1->getType()); + VectorType *VTy = cast<VectorType>(V1->getType()); ConstantVector *CP1 = dyn_cast<ConstantVector>(V1); ConstantVector *CP2 = dyn_cast<ConstantVector>(V2); @@ -709,7 +708,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, (CP2 || isa<ConstantAggregateZero>(V2))) { // Find the element type of the returned vector - const Type *EltTy = VTy->getElementType(); + Type *EltTy = VTy->getElementType(); unsigned NumElem = VTy->getNumElements(); std::vector<Constant*> Res(NumElem); @@ -762,10 +761,14 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) { if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) { + uint64_t Index = CIdx->getZExtValue(); + if (Index >= CVal->getNumOperands()) + // ee({w,x,y,z}, wrong_value) -> undef + return UndefValue::get(cast<VectorType>(Val->getType())->getElementType()); return CVal->getOperand(CIdx->getZExtValue()); } else if (isa<UndefValue>(Idx)) { - // ee({w,x,y,z}, undef) -> w (an arbitrary value). - return CVal->getOperand(0); + // ee({w,x,y,z}, undef) -> undef + return UndefValue::get(cast<VectorType>(Val->getType())->getElementType()); } } return 0; @@ -834,7 +837,7 @@ static Constant *GetVectorElement(Constant *C, unsigned EltNo) { if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) return CV->getOperand(EltNo); - const Type *EltTy = cast<VectorType>(C->getType())->getElementType(); + Type *EltTy = cast<VectorType>(C->getType())->getElementType(); if (isa<ConstantAggregateZero>(C)) return Constant::getNullValue(EltTy); if (isa<UndefValue>(C)) @@ -850,7 +853,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements(); unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements(); - const Type *EltTy = cast<VectorType>(V1->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V1->getType())->getElementType(); // Loop over the shuffle mask, evaluating each element. SmallVector<Constant*, 32> Result; @@ -922,16 +925,16 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, // Otherwise break the aggregate undef into multiple undefs and do // the insertion. - const CompositeType *AggTy = cast<CompositeType>(Agg->getType()); + CompositeType *AggTy = cast<CompositeType>(Agg->getType()); unsigned numOps; - if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy)) + if (ArrayType *AR = dyn_cast<ArrayType>(AggTy)) numOps = AR->getNumElements(); else numOps = cast<StructType>(AggTy)->getNumElements(); std::vector<Constant*> Ops(numOps); for (unsigned i = 0; i < numOps; ++i) { - const Type *MemberTy = AggTy->getTypeAtIndex(i); + Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = (Idxs[0] == i) ? ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy), @@ -940,7 +943,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Ops[i] = Op; } - if (const StructType* ST = dyn_cast<StructType>(AggTy)) + if (StructType* ST = dyn_cast<StructType>(AggTy)) return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -953,16 +956,16 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, // Otherwise break the aggregate zero into multiple zeros and do // the insertion. - const CompositeType *AggTy = cast<CompositeType>(Agg->getType()); + CompositeType *AggTy = cast<CompositeType>(Agg->getType()); unsigned numOps; - if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy)) + if (ArrayType *AR = dyn_cast<ArrayType>(AggTy)) numOps = AR->getNumElements(); else numOps = cast<StructType>(AggTy)->getNumElements(); std::vector<Constant*> Ops(numOps); for (unsigned i = 0; i < numOps; ++i) { - const Type *MemberTy = AggTy->getTypeAtIndex(i); + Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = (Idxs[0] == i) ? ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy), @@ -971,7 +974,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Ops[i] = Op; } - if (const StructType *ST = dyn_cast<StructType>(AggTy)) + if (StructType *ST = dyn_cast<StructType>(AggTy)) return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -986,7 +989,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Ops[i] = Op; } - if (const StructType* ST = dyn_cast<StructType>(Agg->getType())) + if (StructType* ST = dyn_cast<StructType>(Agg->getType())) return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops); } @@ -1265,13 +1268,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, return ConstantFP::get(C1->getContext(), C3V); } } - } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) { + } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) { ConstantVector *CP1 = dyn_cast<ConstantVector>(C1); ConstantVector *CP2 = dyn_cast<ConstantVector>(C2); if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) && (CP2 != NULL || isa<ConstantAggregateZero>(C2))) { std::vector<Constant*> Res; - const Type* EltTy = VTy->getElementType(); + Type* EltTy = VTy->getElementType(); Constant *C1 = 0; Constant *C2 = 0; switch (Opcode) { @@ -1461,8 +1464,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, /// isZeroSizedType - This type is zero sized if its an array or structure of /// zero sized types. The only leaf zero sized type is an empty structure. -static bool isMaybeZeroSizedType(const Type *Ty) { - if (const StructType *STy = dyn_cast<StructType>(Ty)) { +static bool isMaybeZeroSizedType(Type *Ty) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { if (STy->isOpaque()) return true; // Can't say. // If all of elements have zero size, this does too. @@ -1470,7 +1473,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) { if (!isMaybeZeroSizedType(STy->getElementType(i))) return false; return true; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { return isMaybeZeroSizedType(ATy->getElementType()); } return false; @@ -1483,7 +1486,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) { /// first is less than the second, return -1, if the second is less than the /// first, return 1. If the constants are not integral, return -2. /// -static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) { +static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) { if (C1 == C2) return 0; // Ok, we found a different index. If they are not ConstantInt, we can't do @@ -1832,8 +1835,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, Constant *C1, Constant *C2) { - const Type *ResultTy; - if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) + Type *ResultTy; + if (VectorType *VT = dyn_cast<VectorType>(C1->getType())) ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()), VT->getNumElements()); else @@ -2146,9 +2149,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, /// isInBoundsIndices - Test whether the given sequence of *normalized* indices /// is "inbounds". template<typename IndexTy> -static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) { +static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) { // No indices means nothing that could be out of bounds. - if (NumIdx == 0) return true; + if (Idxs.empty()) return true; // If the first index is zero, it's in bounds. if (cast<Constant>(Idxs[0])->isNullValue()) return true; @@ -2157,7 +2160,7 @@ static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) { // by the one-past-the-end rule. if (!cast<ConstantInt>(Idxs[0])->isOne()) return false; - for (unsigned i = 1, e = NumIdx; i != e; ++i) + for (unsigned i = 1, e = Idxs.size(); i != e; ++i) if (!cast<Constant>(Idxs[i])->isNullValue()) return false; return true; @@ -2166,31 +2169,29 @@ static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) { template<typename IndexTy> static Constant *ConstantFoldGetElementPtrImpl(Constant *C, bool inBounds, - IndexTy const *Idxs, - unsigned NumIdx) { - if (NumIdx == 0) return C; + ArrayRef<IndexTy> Idxs) { + if (Idxs.empty()) return C; Constant *Idx0 = cast<Constant>(Idxs[0]); - if ((NumIdx == 1 && Idx0->isNullValue())) + if ((Idxs.size() == 1 && Idx0->isNullValue())) return C; if (isa<UndefValue>(C)) { - const PointerType *Ptr = cast<PointerType>(C->getType()); - const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx); + PointerType *Ptr = cast<PointerType>(C->getType()); + Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); assert(Ty != 0 && "Invalid indices for GEP!"); return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace())); } if (C->isNullValue()) { bool isNull = true; - for (unsigned i = 0, e = NumIdx; i != e; ++i) + for (unsigned i = 0, e = Idxs.size(); i != e; ++i) if (!cast<Constant>(Idxs[i])->isNullValue()) { isNull = false; break; } if (isNull) { - const PointerType *Ptr = cast<PointerType>(C->getType()); - const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, - Idxs+NumIdx); + PointerType *Ptr = cast<PointerType>(C->getType()); + Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); assert(Ty != 0 && "Invalid indices for GEP!"); return ConstantPointerNull::get(PointerType::get(Ty, Ptr->getAddressSpace())); @@ -2203,14 +2204,14 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, // getelementptr instructions into a single instruction. // if (CE->getOpcode() == Instruction::GetElementPtr) { - const Type *LastTy = 0; + Type *LastTy = 0; for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); I != E; ++I) LastTy = *I; if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) { SmallVector<Value*, 16> NewIndices; - NewIndices.reserve(NumIdx + CE->getNumOperands()); + NewIndices.reserve(Idxs.size() + CE->getNumOperands()); for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i) NewIndices.push_back(CE->getOperand(i)); @@ -2219,9 +2220,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, Constant *Combined = CE->getOperand(CE->getNumOperands()-1); // Otherwise it must be an array. if (!Idx0->isNullValue()) { - const Type *IdxTy = Combined->getType(); + Type *IdxTy = Combined->getType(); if (IdxTy != Idx0->getType()) { - const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext()); + Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext()); Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty); Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty); Combined = ConstantExpr::get(Instruction::Add, C1, C2); @@ -2232,14 +2233,11 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, } NewIndices.push_back(Combined); - NewIndices.append(Idxs+1, Idxs+NumIdx); - return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ? - ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0), - &NewIndices[0], - NewIndices.size()) : - ConstantExpr::getGetElementPtr(CE->getOperand(0), - &NewIndices[0], - NewIndices.size()); + NewIndices.append(Idxs.begin() + 1, Idxs.end()); + return + ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices, + inBounds && + cast<GEPOperator>(CE)->isInBounds()); } } @@ -2248,18 +2246,16 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, // i64 0, i64 0) // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0) // - if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) { - if (const PointerType *SPT = + if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) { + if (PointerType *SPT = dyn_cast<PointerType>(CE->getOperand(0)->getType())) - if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType())) - if (const ArrayType *CAT = + if (ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType())) + if (ArrayType *CAT = dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType())) if (CAT->getElementType() == SAT->getElementType()) - return inBounds ? - ConstantExpr::getInBoundsGetElementPtr( - (Constant*)CE->getOperand(0), Idxs, NumIdx) : - ConstantExpr::getGetElementPtr( - (Constant*)CE->getOperand(0), Idxs, NumIdx); + return + ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0), + Idxs, inBounds); } } @@ -2268,19 +2264,19 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, // out into preceding dimensions. bool Unknown = false; SmallVector<Constant *, 8> NewIdxs; - const Type *Ty = C->getType(); - const Type *Prev = 0; - for (unsigned i = 0; i != NumIdx; + Type *Ty = C->getType(); + Type *Prev = 0; + for (unsigned i = 0, e = Idxs.size(); i != e; Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) if (ATy->getNumElements() <= INT64_MAX && ATy->getNumElements() != 0 && CI->getSExtValue() >= (int64_t)ATy->getNumElements()) { if (isa<SequentialType>(Prev)) { // It's out of range, but we can factor it into the prior // dimension. - NewIdxs.resize(NumIdx); + NewIdxs.resize(Idxs.size()); ConstantInt *Factor = ConstantInt::get(CI->getType(), ATy->getNumElements()); NewIdxs[i] = ConstantExpr::getSRem(CI, Factor); @@ -2312,33 +2308,28 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, // If we did any factoring, start over with the adjusted indices. if (!NewIdxs.empty()) { - for (unsigned i = 0; i != NumIdx; ++i) + for (unsigned i = 0, e = Idxs.size(); i != e; ++i) if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]); - return inBounds ? - ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(), - NewIdxs.size()) : - ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size()); + return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds); } // If all indices are known integers and normalized, we can do a simple // check for the "inbounds" property. if (!Unknown && !inBounds && - isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx)) - return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx); + isa<GlobalVariable>(C) && isInBoundsIndices(Idxs)) + return ConstantExpr::getInBoundsGetElementPtr(C, Idxs); return 0; } Constant *llvm::ConstantFoldGetElementPtr(Constant *C, bool inBounds, - Constant* const *Idxs, - unsigned NumIdx) { - return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx); + ArrayRef<Constant *> Idxs) { + return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs); } Constant *llvm::ConstantFoldGetElementPtr(Constant *C, bool inBounds, - Value* const *Idxs, - unsigned NumIdx) { - return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx); + ArrayRef<Value *> Idxs) { + return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs); } diff --git a/contrib/llvm/lib/VMCore/ConstantFold.h b/contrib/llvm/lib/VMCore/ConstantFold.h index 653a1c3..e12f27a 100644 --- a/contrib/llvm/lib/VMCore/ConstantFold.h +++ b/contrib/llvm/lib/VMCore/ConstantFold.h @@ -30,7 +30,7 @@ namespace llvm { Constant *ConstantFoldCastInstruction( unsigned opcode, ///< The opcode of the cast Constant *V, ///< The source constant - const Type *DestTy ///< The destination type + Type *DestTy ///< The destination type ); Constant *ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2); @@ -48,9 +48,9 @@ namespace llvm { Constant *ConstantFoldCompareInstruction(unsigned short predicate, Constant *C1, Constant *C2); Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds, - Constant* const *Idxs, unsigned NumIdx); + ArrayRef<Constant *> Idxs); Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds, - Value* const *Idxs, unsigned NumIdx); + ArrayRef<Value *> Idxs); } // End llvm namespace #endif diff --git a/contrib/llvm/lib/VMCore/Constants.cpp b/contrib/llvm/lib/VMCore/Constants.cpp index 316c884..a84a046 100644 --- a/contrib/llvm/lib/VMCore/Constants.cpp +++ b/contrib/llvm/lib/VMCore/Constants.cpp @@ -62,8 +62,23 @@ bool Constant::isNullValue() const { return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this); } +bool Constant::isAllOnesValue() const { + // Check for -1 integers + if (const ConstantInt *CI = dyn_cast<ConstantInt>(this)) + return CI->isMinusOne(); + + // Check for FP which are bitcasted from -1 integers + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) + return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue(); + + // Check for constant vectors + if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) + return CV->isAllOnesValue(); + + return false; +} // Constructor to create a '0' constant of arbitrary type... -Constant *Constant::getNullValue(const Type *Ty) { +Constant *Constant::getNullValue(Type *Ty) { switch (Ty->getTypeID()) { case Type::IntegerTyID: return ConstantInt::get(Ty, 0); @@ -90,30 +105,30 @@ Constant *Constant::getNullValue(const Type *Ty) { return ConstantAggregateZero::get(Ty); default: // Function, Label, or Opaque type? - assert(!"Cannot create a null constant of that type!"); + assert(0 && "Cannot create a null constant of that type!"); return 0; } } -Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) { - const Type *ScalarTy = Ty->getScalarType(); +Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) { + Type *ScalarTy = Ty->getScalarType(); // Create the base integer constant. Constant *C = ConstantInt::get(Ty->getContext(), V); // Convert an integer to a pointer, if necessary. - if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy)) + if (PointerType *PTy = dyn_cast<PointerType>(ScalarTy)) C = ConstantExpr::getIntToPtr(C, PTy); // Broadcast a scalar to a vector, if necessary. - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C)); return C; } -Constant *Constant::getAllOnesValue(const Type *Ty) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) +Constant *Constant::getAllOnesValue(Type *Ty) { + if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) return ConstantInt::get(Ty->getContext(), APInt::getAllOnesValue(ITy->getBitWidth())); @@ -124,9 +139,9 @@ Constant *Constant::getAllOnesValue(const Type *Ty) { } SmallVector<Constant*, 16> Elts; - const VectorType *VTy = cast<VectorType>(Ty); + VectorType *VTy = cast<VectorType>(Ty); Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); - assert(Elts[0] && "Not a vector integer type!"); + assert(Elts[0] && "Invalid AllOnes value!"); return cast<ConstantVector>(ConstantVector::get(Elts)); } @@ -269,7 +284,7 @@ void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const { return; } - const VectorType *VT = cast<VectorType>(getType()); + VectorType *VT = cast<VectorType>(getType()); if (isa<ConstantAggregateZero>(this)) { Elts.assign(VT->getNumElements(), Constant::getNullValue(VT->getElementType())); @@ -343,7 +358,7 @@ void Constant::removeDeadConstantUsers() const { // ConstantInt //===----------------------------------------------------------------------===// -ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V) +ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V) : Constant(Ty, ConstantIntVal, 0, 0), Val(V) { assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); } @@ -362,8 +377,8 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { return pImpl->TheFalseVal; } -Constant *ConstantInt::getTrue(const Type *Ty) { - const VectorType *VTy = dyn_cast<VectorType>(Ty); +Constant *ConstantInt::getTrue(Type *Ty) { + VectorType *VTy = dyn_cast<VectorType>(Ty); if (!VTy) { assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1."); return ConstantInt::getTrue(Ty->getContext()); @@ -375,8 +390,8 @@ Constant *ConstantInt::getTrue(const Type *Ty) { return ConstantVector::get(Splat); } -Constant *ConstantInt::getFalse(const Type *Ty) { - const VectorType *VTy = dyn_cast<VectorType>(Ty); +Constant *ConstantInt::getFalse(Type *Ty) { + VectorType *VTy = dyn_cast<VectorType>(Ty); if (!VTy) { assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1."); return ConstantInt::getFalse(Ty->getContext()); @@ -396,7 +411,7 @@ Constant *ConstantInt::getFalse(const Type *Ty) { // invariant which generates an assertion. ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) { // Get the corresponding integer type for the bit width of the value. - const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); + IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); // get an existing value or the insertion position DenseMapAPIntKeyInfo::KeyTy Key(V, ITy); ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; @@ -404,44 +419,44 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) { return Slot; } -Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) { +Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) { Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned); // For vectors, broadcast the value. - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::get(SmallVector<Constant*, 16>(VTy->getNumElements(), C)); return C; } -ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V, +ConstantInt* ConstantInt::get(IntegerType* Ty, uint64_t V, bool isSigned) { return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned)); } -ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) { +ConstantInt* ConstantInt::getSigned(IntegerType* Ty, int64_t V) { return get(Ty, V, true); } -Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) { +Constant *ConstantInt::getSigned(Type *Ty, int64_t V) { return get(Ty, V, true); } -Constant *ConstantInt::get(const Type* Ty, const APInt& V) { +Constant *ConstantInt::get(Type* Ty, const APInt& V) { ConstantInt *C = get(Ty->getContext(), V); assert(C->getType() == Ty->getScalarType() && "ConstantInt type doesn't match the type implied by its value!"); // For vectors, broadcast the value. - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::get( SmallVector<Constant *, 16>(VTy->getNumElements(), C)); return C; } -ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str, +ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str, uint8_t radix) { return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix)); } @@ -450,7 +465,7 @@ ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str, // ConstantFP //===----------------------------------------------------------------------===// -static const fltSemantics *TypeToFloatSemantics(const Type *Ty) { +static const fltSemantics *TypeToFloatSemantics(Type *Ty) { if (Ty->isFloatTy()) return &APFloat::IEEEsingle; if (Ty->isDoubleTy()) @@ -467,7 +482,7 @@ static const fltSemantics *TypeToFloatSemantics(const Type *Ty) { /// get() - This returns a constant fp for the specified value in the /// specified type. This should only be used for simple constant values like /// 2.0/1.0 etc, that are known-valid both as double and as the target format. -Constant *ConstantFP::get(const Type* Ty, double V) { +Constant *ConstantFP::get(Type* Ty, double V) { LLVMContext &Context = Ty->getContext(); APFloat FV(V); @@ -477,7 +492,7 @@ Constant *ConstantFP::get(const Type* Ty, double V) { Constant *C = get(Context, FV); // For vectors, broadcast the value. - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::get( SmallVector<Constant *, 16>(VTy->getNumElements(), C)); @@ -485,14 +500,14 @@ Constant *ConstantFP::get(const Type* Ty, double V) { } -Constant *ConstantFP::get(const Type* Ty, StringRef Str) { +Constant *ConstantFP::get(Type* Ty, StringRef Str) { LLVMContext &Context = Ty->getContext(); APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str); Constant *C = get(Context, FV); // For vectors, broadcast the value. - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::get( SmallVector<Constant *, 16>(VTy->getNumElements(), C)); @@ -500,7 +515,7 @@ Constant *ConstantFP::get(const Type* Ty, StringRef Str) { } -ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) { +ConstantFP* ConstantFP::getNegativeZero(Type* Ty) { LLVMContext &Context = Ty->getContext(); APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF(); apf.changeSign(); @@ -508,8 +523,8 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) { } -Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) { - if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) +Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { + if (VectorType *PTy = dyn_cast<VectorType>(Ty)) if (PTy->getElementType()->isFloatingPointTy()) { SmallVector<Constant*, 16> zeros(PTy->getNumElements(), getNegativeZero(PTy->getElementType())); @@ -532,7 +547,7 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { ConstantFP *&Slot = pImpl->FPConstants[Key]; if (!Slot) { - const Type *Ty; + Type *Ty; if (&V.getSemantics() == &APFloat::IEEEsingle) Ty = Type::getFloatTy(Context); else if (&V.getSemantics() == &APFloat::IEEEdouble) @@ -552,13 +567,13 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { return Slot; } -ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) { +ConstantFP *ConstantFP::getInfinity(Type *Ty, bool Negative) { const fltSemantics &Semantics = *TypeToFloatSemantics(Ty); return ConstantFP::get(Ty->getContext(), APFloat::getInf(Semantics, Negative)); } -ConstantFP::ConstantFP(const Type *Ty, const APFloat& V) +ConstantFP::ConstantFP(Type *Ty, const APFloat& V) : Constant(Ty, ConstantFPVal, 0, 0), Val(V) { assert(&V.getSemantics() == TypeToFloatSemantics(Ty) && "FP type Mismatch"); @@ -573,24 +588,19 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const { //===----------------------------------------------------------------------===// -ConstantArray::ConstantArray(const ArrayType *T, - const std::vector<Constant*> &V) +ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V) : Constant(T, ConstantArrayVal, OperandTraits<ConstantArray>::op_end(this) - V.size(), V.size()) { assert(V.size() == T->getNumElements() && "Invalid initializer vector for constant array"); - Use *OL = OperandList; - for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end(); - I != E; ++I, ++OL) { - Constant *C = *I; - assert(C->getType() == T->getElementType() && + for (unsigned i = 0, e = V.size(); i != e; ++i) + assert(V[i]->getType() == T->getElementType() && "Initializer for array element doesn't match array element type!"); - *OL = C; - } + std::copy(V.begin(), V.end(), op_begin()); } -Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) { +Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) { for (unsigned i = 0, e = V.size(); i != e; ++i) { assert(V[i]->getType() == Ty->getElementType() && "Wrong type in array element initializer"); @@ -653,25 +663,20 @@ StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V, } -ConstantStruct::ConstantStruct(const StructType *T, - const std::vector<Constant*> &V) +ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V) : Constant(T, ConstantStructVal, OperandTraits<ConstantStruct>::op_end(this) - V.size(), V.size()) { - assert((T->isOpaque() || V.size() == T->getNumElements()) && + assert(V.size() == T->getNumElements() && "Invalid initializer vector for constant structure"); - Use *OL = OperandList; - for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end(); - I != E; ++I, ++OL) { - Constant *C = *I; - assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) && + for (unsigned i = 0, e = V.size(); i != e; ++i) + assert((T->isOpaque() || V[i]->getType() == T->getElementType(i)) && "Initializer for struct element doesn't match struct element type!"); - *OL = C; - } + std::copy(V.begin(), V.end(), op_begin()); } // ConstantStruct accessors. -Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) { +Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) { // Create a ConstantAggregateZero value if all elements are zeros. for (unsigned i = 0, e = V.size(); i != e; ++i) if (!V[i]->isNullValue()) @@ -682,7 +687,7 @@ Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) { return ConstantAggregateZero::get(ST); } -Constant* ConstantStruct::get(const StructType *T, ...) { +Constant *ConstantStruct::get(StructType *T, ...) { va_list ap; SmallVector<Constant*, 8> Values; va_start(ap, T); @@ -692,25 +697,20 @@ Constant* ConstantStruct::get(const StructType *T, ...) { return get(T, Values); } -ConstantVector::ConstantVector(const VectorType *T, - const std::vector<Constant*> &V) +ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V) : Constant(T, ConstantVectorVal, OperandTraits<ConstantVector>::op_end(this) - V.size(), V.size()) { - Use *OL = OperandList; - for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end(); - I != E; ++I, ++OL) { - Constant *C = *I; - assert(C->getType() == T->getElementType() && + for (size_t i = 0, e = V.size(); i != e; i++) + assert(V[i]->getType() == T->getElementType() && "Initializer for vector element doesn't match vector element type!"); - *OL = C; - } + std::copy(V.begin(), V.end(), op_begin()); } // ConstantVector accessors. Constant *ConstantVector::get(ArrayRef<Constant*> V) { assert(!V.empty() && "Vectors can't be empty"); - const VectorType *T = VectorType::get(V.front()->getType(), V.size()); + VectorType *T = VectorType::get(V.front()->getType(), V.size()); LLVMContextImpl *pImpl = T->getContext().pImpl; // If this is an all-undef or all-zero vector, return a @@ -761,7 +761,7 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const { for (; GEPI != E; ++GEPI, ++OI) { ConstantInt *CI = dyn_cast<ConstantInt>(*OI); if (!CI) return false; - if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI)) + if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI)) if (CI->getValue().getActiveBits() > 64 || CI->getZExtValue() >= ATy->getNumElements()) return false; @@ -839,13 +839,13 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { for (unsigned i = 1, e = getNumOperands(); i != e; ++i) Ops[i-1] = getOperand(i); if (OpNo == 0) - return cast<GEPOperator>(this)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) : - ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size()); + return + ConstantExpr::getGetElementPtr(Op, Ops, + cast<GEPOperator>(this)->isInBounds()); Ops[OpNo-1] = Op; - return cast<GEPOperator>(this)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0],Ops.size()): - ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size()); + return + ConstantExpr::getGetElementPtr(getOperand(0), Ops, + cast<GEPOperator>(this)->isInBounds()); } default: assert(getNumOperands() == 2 && "Must be binary operator?"); @@ -859,7 +859,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { /// operands replaced with the specified values. The specified array must /// have the same number of operands as our current one. Constant *ConstantExpr:: -getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const { +getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const { assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); bool AnyChange = Ty != getType(); for (unsigned i = 0; i != Ops.size(); ++i) @@ -891,9 +891,9 @@ getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const { case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - return cast<GEPOperator>(this)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) : - ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1); + return + ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1), + cast<GEPOperator>(this)->isInBounds()); case Instruction::ICmp: case Instruction::FCmp: return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]); @@ -907,7 +907,7 @@ getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const { //===----------------------------------------------------------------------===// // isValueValidForType implementations -bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) { +bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) { unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay if (Ty == Type::getInt1Ty(Ty->getContext())) return Val == 0 || Val == 1; @@ -917,7 +917,7 @@ bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) { return Val <= Max; } -bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) { +bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) { unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay if (Ty == Type::getInt1Ty(Ty->getContext())) return Val == 0 || Val == 1 || Val == -1; @@ -928,7 +928,7 @@ bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) { return (Val >= Min && Val <= Max); } -bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) { +bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) { // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; @@ -968,7 +968,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) { //===----------------------------------------------------------------------===// // Factory Function Implementation -ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { +ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) { assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); @@ -1079,13 +1079,16 @@ bool ConstantVector::isAllOnesValue() const { // Check out first element. const Constant *Elt = getOperand(0); const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - if (!CI || !CI->isAllOnesValue()) return false; + const ConstantFP *CF = dyn_cast<ConstantFP>(Elt); + // Then make sure all remaining elements point to the same value. for (unsigned I = 1, E = getNumOperands(); I < E; ++I) if (getOperand(I) != Elt) return false; - return true; + // First value is all-ones. + return (CI && CI->isAllOnesValue()) || + (CF && CF->isAllOnesValue()); } /// getSplatValue - If this is a splat constant, where all of the @@ -1103,7 +1106,7 @@ Constant *ConstantVector::getSplatValue() const { //---- ConstantPointerNull::get() implementation. // -ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) { +ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0); } @@ -1118,7 +1121,7 @@ void ConstantPointerNull::destroyConstant() { //---- UndefValue::get() implementation. // -UndefValue *UndefValue::get(const Type *Ty) { +UndefValue *UndefValue::get(Type *Ty) { return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0); } @@ -1209,7 +1212,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { /// This is a utility function to handle folding of casts and lookup of the /// cast in the ExprConstants map. It is used by the various get* methods below. static inline Constant *getFoldedCast( - Instruction::CastOps opc, Constant *C, const Type *Ty) { + Instruction::CastOps opc, Constant *C, Type *Ty) { assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!"); // Fold a few common cases if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty)) @@ -1224,7 +1227,7 @@ static inline Constant *getFoldedCast( return pImpl->ExprConstants.getOrCreate(Ty, Key); } -Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) { +Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) { Instruction::CastOps opc = Instruction::CastOps(oc); assert(Instruction::isCast(opc) && "opcode out of range"); assert(C && Ty && "Null arguments to getCast"); @@ -1250,25 +1253,25 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) { return 0; } -Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getBitCast(C, Ty); return getZExt(C, Ty); } -Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getBitCast(C, Ty); return getSExt(C, Ty); } -Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getBitCast(C, Ty); return getTrunc(C, Ty); } -Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { +Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) { assert(S->getType()->isPointerTy() && "Invalid cast"); assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); @@ -1277,7 +1280,7 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { return getBitCast(S, Ty); } -Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, +Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, bool isSigned) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid cast"); @@ -1290,7 +1293,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, return getCast(opcode, C, Ty); } -Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) { assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); @@ -1302,7 +1305,7 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { return getCast(opcode, C, Ty); } -Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1316,7 +1319,7 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::Trunc, C, Ty); } -Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1330,7 +1333,7 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::SExt, C, Ty); } -Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1344,7 +1347,7 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::ZExt, C, Ty); } -Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1356,7 +1359,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::FPTrunc, C, Ty); } -Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1368,7 +1371,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::FPExt, C, Ty); } -Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1379,7 +1382,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::UIToFP, C, Ty); } -Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1390,7 +1393,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::SIToFP, C, Ty); } -Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1401,7 +1404,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::FPToUI, C, Ty); } -Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) { +Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) { #ifndef NDEBUG bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; bool toVec = Ty->getTypeID() == Type::VectorTyID; @@ -1412,19 +1415,19 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) { return getFoldedCast(Instruction::FPToSI, C, Ty); } -Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) { +Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) { assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer"); assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } -Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) { +Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) { assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral"); assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } -Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) { +Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) { assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) && "Invalid constantexpr bitcast!"); @@ -1513,36 +1516,36 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, return pImpl->ExprConstants.getOrCreate(C1->getType(), Key); } -Constant *ConstantExpr::getSizeOf(const Type* Ty) { +Constant *ConstantExpr::getSizeOf(Type* Ty) { // sizeof is implemented as: (i64) gep (Ty*)null, 1 // Note that a non-inbounds gep is used, as null isn't within any object. Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *GEP = getGetElementPtr( - Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1); + Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } -Constant *ConstantExpr::getAlignOf(const Type* Ty) { +Constant *ConstantExpr::getAlignOf(Type* Ty) { // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1 // Note that a non-inbounds gep is used, as null isn't within any object. - const Type *AligningTy = + Type *AligningTy = StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL); Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo()); Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *Indices[2] = { Zero, One }; - Constant *GEP = getGetElementPtr(NullPtr, Indices, 2); + Constant *GEP = getGetElementPtr(NullPtr, Indices); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } -Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) { +Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) { return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()), FieldNo)); } -Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) { +Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) { // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo // Note that a non-inbounds gep is used, as null isn't within any object. Constant *GEPIdx[] = { @@ -1550,7 +1553,7 @@ Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) { FieldNo }; Constant *GEP = getGetElementPtr( - Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2); + Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } @@ -1592,14 +1595,13 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) { return pImpl->ExprConstants.getOrCreate(V1->getType(), Key); } -Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs, - unsigned NumIdx, bool InBounds) { - if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx)) +Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs, + bool InBounds) { + if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs)) return FC; // Fold a few common cases. // Get the result type of the getelementptr! - const Type *Ty = - GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx); + Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs); assert(Ty && "GEP indices invalid!"); unsigned AS = cast<PointerType>(C->getType())->getAddressSpace(); Type *ReqTy = Ty->getPointerTo(AS); @@ -1608,9 +1610,9 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs, "Non-pointer type for constant GetElementPtr expression"); // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec; - ArgVec.reserve(NumIdx+1); + ArgVec.reserve(1 + Idxs.size()); ArgVec.push_back(C); - for (unsigned i = 0; i != NumIdx; ++i) + for (unsigned i = 0, e = Idxs.size(); i != e; ++i) ArgVec.push_back(cast<Constant>(Idxs[i])); const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0, InBounds ? GEPOperator::IsInBounds : 0); @@ -1635,8 +1637,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) { // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred); - const Type *ResultTy = Type::getInt1Ty(LHS->getContext()); - if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType())) + Type *ResultTy = Type::getInt1Ty(LHS->getContext()); + if (VectorType *VT = dyn_cast<VectorType>(LHS->getType())) ResultTy = VectorType::get(ResultTy, VT->getNumElements()); LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl; @@ -1658,8 +1660,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred); - const Type *ResultTy = Type::getInt1Ty(LHS->getContext()); - if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType())) + Type *ResultTy = Type::getInt1Ty(LHS->getContext()); + if (VectorType *VT = dyn_cast<VectorType>(LHS->getType())) ResultTy = VectorType::get(ResultTy, VT->getNumElements()); LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl; @@ -1715,8 +1717,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, return FC; // Fold a few common cases. unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements(); - const Type *EltTy = cast<VectorType>(V1->getType())->getElementType(); - const Type *ShufTy = VectorType::get(EltTy, NElts); + Type *EltTy = cast<VectorType>(V1->getType())->getElementType(); + Type *ShufTy = VectorType::get(EltTy, NElts); // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec(1, V1); @@ -1745,7 +1747,7 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, assert(Agg->getType()->isFirstClassType() && "Tried to create extractelement operation on non-first-class type!"); - const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs); + Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs); (void)ReqTy; assert(ReqTy && "extractvalue indices invalid!"); @@ -1878,7 +1880,7 @@ const char *ConstantExpr::getOpcodeName() const { GetElementPtrConstantExpr:: GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList, - const Type *DestTy) + Type *DestTy) : ConstantExpr(DestTy, Instruction::GetElementPtr, OperandTraits<GetElementPtrConstantExpr>::op_end(this) - (IdxList.size()+1), IdxList.size()+1) { @@ -2091,8 +2093,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, if (Val == From) Val = To; Indices.push_back(Val); } - Replacement = ConstantExpr::getGetElementPtr(Pointer, - &Indices[0], Indices.size(), + Replacement = ConstantExpr::getGetElementPtr(Pointer, Indices, cast<GEPOperator>(this)->isInBounds()); } else if (getOpcode() == Instruction::ExtractValue) { Constant *Agg = getOperand(0); diff --git a/contrib/llvm/lib/VMCore/ConstantsContext.h b/contrib/llvm/lib/VMCore/ConstantsContext.h index bd134d9..1077004 100644 --- a/contrib/llvm/lib/VMCore/ConstantsContext.h +++ b/contrib/llvm/lib/VMCore/ConstantsContext.h @@ -36,7 +36,7 @@ public: void *operator new(size_t s) { return User::operator new(s, 1); } - UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty) + UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty) : ConstantExpr(Ty, Opcode, &Op<0>(), 1) { Op<0>() = C; } @@ -159,7 +159,7 @@ public: } ExtractValueConstantExpr(Constant *Agg, const SmallVector<unsigned, 4> &IdxList, - const Type *DestTy) + Type *DestTy) : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1), Indices(IdxList) { Op<0>() = Agg; @@ -184,7 +184,7 @@ public: } InsertValueConstantExpr(Constant *Agg, Constant *Val, const SmallVector<unsigned, 4> &IdxList, - const Type *DestTy) + Type *DestTy) : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2), Indices(IdxList) { Op<0>() = Agg; @@ -203,11 +203,11 @@ public: /// used behind the scenes to implement getelementpr constant exprs. class GetElementPtrConstantExpr : public ConstantExpr { GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList, - const Type *DestTy); + Type *DestTy); public: static GetElementPtrConstantExpr *Create(Constant *C, const std::vector<Constant*>&IdxList, - const Type *DestTy, + Type *DestTy, unsigned Flags) { GetElementPtrConstantExpr *Result = new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy); @@ -228,7 +228,7 @@ struct CompareConstantExpr : public ConstantExpr { return User::operator new(s, 2); } unsigned short predicate; - CompareConstantExpr(const Type *ty, Instruction::OtherOps opc, + CompareConstantExpr(Type *ty, Instruction::OtherOps opc, unsigned short pred, Constant* LHS, Constant* RHS) : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) { Op<0>() = LHS; @@ -392,7 +392,7 @@ struct ConstantTraits<Constant *> { template<class ConstantClass, class TypeClass, class ValType> struct ConstantCreator { - static ConstantClass *create(const TypeClass *Ty, const ValType &V) { + static ConstantClass *create(TypeClass *Ty, const ValType &V) { return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V); } }; @@ -407,7 +407,7 @@ struct ConstantKeyData { template<> struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> { - static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V, + static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V, unsigned short pred = 0) { if (Instruction::isCast(V.opcode)) return new UnaryConstantExpr(V.opcode, V.operands[0], Ty); @@ -470,7 +470,7 @@ struct ConstantKeyData<ConstantExpr> { // ConstantAggregateZero does not take extra "value" argument... template<class ValType> struct ConstantCreator<ConstantAggregateZero, Type, ValType> { - static ConstantAggregateZero *create(const Type *Ty, const ValType &V){ + static ConstantAggregateZero *create(Type *Ty, const ValType &V){ return new ConstantAggregateZero(Ty); } }; @@ -522,7 +522,7 @@ struct ConstantKeyData<ConstantStruct> { // ConstantPointerNull does not take extra "value" argument... template<class ValType> struct ConstantCreator<ConstantPointerNull, PointerType, ValType> { - static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){ + static ConstantPointerNull *create(PointerType *Ty, const ValType &V){ return new ConstantPointerNull(Ty); } }; @@ -538,7 +538,7 @@ struct ConstantKeyData<ConstantPointerNull> { // UndefValue does not take extra "value" argument... template<class ValType> struct ConstantCreator<UndefValue, Type, ValType> { - static UndefValue *create(const Type *Ty, const ValType &V) { + static UndefValue *create(Type *Ty, const ValType &V) { return new UndefValue(Ty); } }; @@ -553,7 +553,7 @@ struct ConstantKeyData<UndefValue> { template<> struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> { - static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) { + static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) { return new InlineAsm(Ty, Key.asm_string, Key.constraints, Key.has_side_effects, Key.is_align_stack); } @@ -572,7 +572,7 @@ template<class ValType, class ValRefType, class TypeClass, class ConstantClass, bool HasLargeKey = false /*true for arrays and structs*/ > class ConstantUniqueMap { public: - typedef std::pair<const TypeClass*, ValType> MapKey; + typedef std::pair<TypeClass*, ValType> MapKey; typedef std::map<MapKey, ConstantClass *> MapTy; typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy; private: @@ -623,7 +623,7 @@ private: } typename MapTy::iterator I = - Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()), + Map.find(MapKey(static_cast<TypeClass*>(CP->getType()), ConstantKeyData<ConstantClass>::getValType(CP))); if (I == Map.end() || I->second != CP) { // FIXME: This should not use a linear scan. If this gets to be a @@ -634,7 +634,7 @@ private: return I; } - ConstantClass *Create(const TypeClass *Ty, ValRefType V, + ConstantClass *Create(TypeClass *Ty, ValRefType V, typename MapTy::iterator I) { ConstantClass* Result = ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V); @@ -651,7 +651,7 @@ public: /// getOrCreate - Return the specified constant from the map, creating it if /// necessary. - ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) { + ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) { MapKey Lookup(Ty, V); ConstantClass* Result = 0; diff --git a/contrib/llvm/lib/VMCore/Core.cpp b/contrib/llvm/lib/VMCore/Core.cpp index 2a816e1..a505e4b 100644 --- a/contrib/llvm/lib/VMCore/Core.cpp +++ b/contrib/llvm/lib/VMCore/Core.cpp @@ -167,6 +167,11 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { } } +LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty) +{ + return unwrap(Ty)->isSized(); +} + LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) { return wrap(&unwrap(Ty)->getContext()); } @@ -299,7 +304,15 @@ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name) { - return wrap(StructType::createNamed(*unwrap(C), Name)); + return wrap(StructType::create(*unwrap(C), Name)); +} + +const char *LLVMGetStructName(LLVMTypeRef Ty) +{ + StructType *Type = unwrap<StructType>(Ty); + if (!Type->hasName()) + return 0; + return Type->getName().data(); } void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes, @@ -448,7 +461,10 @@ LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) { /*--.. Operations on Users .................................................--*/ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) { - return wrap(unwrap<User>(Val)->getOperand(Index)); + Value *V = unwrap(Val); + if (MDNode *MD = dyn_cast<MDNode>(V)) + return wrap(MD->getOperand(Index)); + return wrap(cast<User>(V)->getOperand(Index)); } void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) { @@ -456,7 +472,10 @@ void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) { } int LLVMGetNumOperands(LLVMValueRef Val) { - return unwrap<User>(Val)->getNumOperands(); + Value *V = unwrap(Val); + if (MDNode *MD = dyn_cast<MDNode>(V)) + return MD->getNumOperands(); + return cast<User>(V)->getNumOperands(); } /*--.. Operations on constants of any type .................................--*/ @@ -506,13 +525,39 @@ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) { LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, unsigned Count) { return wrap(MDNode::get(*unwrap(C), - ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count))); + makeArrayRef(unwrap<Value>(Vals, Count), Count))); } LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count); } +const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) { + if (const MDString *S = dyn_cast<MDString>(unwrap(V))) { + *Len = S->getString().size(); + return S->getString().data(); + } + *Len = 0; + return 0; +} + +unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name) +{ + if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) { + return N->getNumOperands(); + } + return 0; +} + +void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest) +{ + NamedMDNode *N = unwrap(M)->getNamedMetadata(name); + if (!N) + return; + for (unsigned i=0;i<N->getNumOperands();i++) + Dest[i] = wrap(N->getOperand(i)); +} + /*--.. Operations on scalar constants ......................................--*/ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, @@ -525,7 +570,8 @@ LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy, const uint64_t Words[]) { IntegerType *Ty = unwrap<IntegerType>(IntTy); return wrap(ConstantInt::get(Ty->getContext(), - APInt(Ty->getBitWidth(), NumWords, Words))); + APInt(Ty->getBitWidth(), + makeArrayRef(Words, NumWords)))); } LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[], @@ -575,8 +621,7 @@ LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { Constant **Elements = unwrap<Constant>(ConstantVals, Count); - return wrap(ConstantStruct::getAnon(*unwrap(C), - ArrayRef<Constant*>(Elements, Count), + return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count), Packed != 0)); } @@ -600,19 +645,44 @@ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, LLVMValueRef *ConstantVals, unsigned Count) { Constant **Elements = unwrap<Constant>(ConstantVals, Count); - const StructType *Ty = cast<StructType>(unwrap(StructTy)); + StructType *Ty = cast<StructType>(unwrap(StructTy)); - return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count))); + return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count))); } LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) { - return wrap(ConstantVector::get(ArrayRef<Constant*>( + return wrap(ConstantVector::get(makeArrayRef( unwrap<Constant>(ScalarConstantVals, Size), Size))); } + +/*-- Opcode mapping */ + +static LLVMOpcode map_to_llvmopcode(int opcode) +{ + switch (opcode) { + default: + assert(0 && "Unhandled Opcode."); +#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc; +#include "llvm/Instruction.def" +#undef HANDLE_INST + } +} + +static int map_from_llvmopcode(LLVMOpcode code) +{ + switch (code) { + default: + assert(0 && "Unhandled Opcode."); +#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num; +#include "llvm/Instruction.def" +#undef HANDLE_INST + } +} + /*--.. Constant expressions ................................................--*/ LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) { - return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode(); + return map_to_llvmopcode(unwrap<ConstantExpr>(ConstantVal)->getOpcode()); } LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) { @@ -792,18 +862,19 @@ LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices) { + ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices), + NumIndices); return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal), - unwrap<Constant>(ConstantIndices, - NumIndices), - NumIndices)); + IdxList)); } LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices) { Constant* Val = unwrap<Constant>(ConstantVal); - Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices); - return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices)); + ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices), + NumIndices); + return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList)); } LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { @@ -934,8 +1005,7 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant), - ArrayRef<unsigned>(IdxList, - NumIdx))); + makeArrayRef(IdxList, NumIdx))); } LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, @@ -943,8 +1013,7 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant), unwrap<Constant>(ElementValueConstant), - ArrayRef<unsigned>(IdxList, - NumIdx))); + makeArrayRef(IdxList, NumIdx))); } LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, @@ -1383,6 +1452,10 @@ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) { return wrap(unwrap(BB)->getParent()); } +LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) { + return wrap(unwrap(BB)->getTerminator()); +} + unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) { return unwrap<Function>(FnRef)->size(); } @@ -1455,6 +1528,10 @@ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) { unwrap(BBRef)->eraseFromParent(); } +void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) { + unwrap(BBRef)->removeFromParent(); +} + void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) { unwrap(BB)->moveBefore(unwrap(MovePos)); } @@ -1501,6 +1578,25 @@ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) { return wrap(--I); } +void LLVMInstructionEraseFromParent(LLVMValueRef Inst) { + unwrap<Instruction>(Inst)->eraseFromParent(); +} + +LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) { + if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst))) + return (LLVMIntPredicate)I->getPredicate(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst))) + if (CE->getOpcode() == Instruction::ICmp) + return (LLVMIntPredicate)CE->getPredicate(); + return (LLVMIntPredicate)0; +} + +LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) { + if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst))) + return map_to_llvmopcode(C->getOpcode()); + return (LLVMOpcode)0; +} + /*--.. Call and invoke instructions ........................................--*/ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) { @@ -1554,6 +1650,12 @@ void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) { unwrap<CallInst>(Call)->setTailCall(isTailCall); } +/*--.. Operations on switch instructions (only) ............................--*/ + +LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) { + return wrap(unwrap<SwitchInst>(Switch)->getDefaultDest()); +} + /*--.. Operations on phi nodes .............................................--*/ void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues, @@ -1680,12 +1782,20 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name) { return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch), - ArrayRef<Value *>(unwrap(Args), NumArgs), + makeArrayRef(unwrap(Args), NumArgs), Name)); } -LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) { - return wrap(unwrap(B)->CreateUnwind()); +LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name) { + return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), + cast<Function>(unwrap(PersFn)), + NumClauses, Name)); +} + +LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) { + return wrap(unwrap(B)->CreateResume(unwrap(Exn))); } LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) { @@ -1701,6 +1811,15 @@ void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) { unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest)); } +void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) { + unwrap<LandingPadInst>(LandingPad)-> + addClause(cast<Constant>(unwrap(ClauseVal))); +} + +void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) { + unwrap<LandingPadInst>(LandingPad)->setCleanup(Val); +} + /*--.. Arithmetic ..........................................................--*/ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, @@ -1831,7 +1950,7 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { - return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS), + return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS), unwrap(RHS), Name)); } @@ -1861,7 +1980,7 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), @@ -1872,7 +1991,7 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name) { - const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), @@ -1910,15 +2029,15 @@ LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { - return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices), - unwrap(Indices) + NumIndices, Name)); + ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices); + return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { - return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices), - unwrap(Indices) + NumIndices, Name)); + ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices); + return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, @@ -2018,7 +2137,7 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { - return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val), + return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val), unwrap(DestTy), Name)); } @@ -2064,7 +2183,7 @@ LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { return wrap(unwrap(B)->CreateCall(unwrap(Fn), - ArrayRef<Value *>(unwrap(Args), NumArgs), + makeArrayRef(unwrap(Args), NumArgs), Name)); } diff --git a/contrib/llvm/lib/VMCore/DebugLoc.cpp b/contrib/llvm/lib/VMCore/DebugLoc.cpp index 4ff6b2c..328244f 100644 --- a/contrib/llvm/lib/VMCore/DebugLoc.cpp +++ b/contrib/llvm/lib/VMCore/DebugLoc.cpp @@ -104,7 +104,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { assert(Scope && "If scope is null, this should be isUnknown()"); LLVMContext &Ctx2 = Scope->getContext(); - const Type *Int32 = Type::getInt32Ty(Ctx2); + Type *Int32 = Type::getInt32Ty(Ctx2); Value *Elts[] = { ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()), Scope, IA @@ -240,7 +240,7 @@ int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA, /// deleted - The MDNode this is pointing to got deleted, so this pointer needs /// to drop to null and we need remove our entry from the DenseMap. void DebugRecVH::deleted() { - // If this is a non-canonical reference, just drop the value to null, we know + // If this is a non-canonical reference, just drop the value to null, we know // it doesn't have a map entry. if (Idx == 0) { setValPtr(0); diff --git a/contrib/llvm/lib/VMCore/Function.cpp b/contrib/llvm/lib/VMCore/Function.cpp index 6536bcd..1215e6a 100644 --- a/contrib/llvm/lib/VMCore/Function.cpp +++ b/contrib/llvm/lib/VMCore/Function.cpp @@ -17,6 +17,7 @@ #include "llvm/LLVMContext.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/StringPool.h" @@ -38,7 +39,7 @@ template class llvm::SymbolTableListTraits<BasicBlock, Function>; // Argument Implementation //===----------------------------------------------------------------------===// -Argument::Argument(const Type *Ty, const Twine &Name, Function *Par) +Argument::Argument(Type *Ty, const Twine &Name, Function *Par) : Value(Ty, Value::ArgumentVal) { Parent = 0; @@ -158,7 +159,7 @@ void Function::eraseFromParent() { // Function Implementation //===----------------------------------------------------------------------===// -Function::Function(const FunctionType *Ty, LinkageTypes Linkage, +Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name, Module *ParentModule) : GlobalValue(PointerType::getUnqual(Ty), Value::FunctionVal, 0, 0, Linkage, name) { @@ -195,7 +196,7 @@ Function::~Function() { void Function::BuildLazyArguments() const { // Create the arguments vector, all arguments start out unnamed. - const FunctionType *FT = getFunctionType(); + FunctionType *FT = getFunctionType(); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { assert(!FT->getParamType(i)->isVoidTy() && "Cannot have void typed arguments!"); @@ -345,7 +346,7 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) { return Table[id]; std::string Result(Table[id]); for (unsigned i = 0; i < Tys.size(); ++i) { - if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) { + if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) { Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + EVT::getEVT(PTyp->getElementType()).getEVTString(); } @@ -355,9 +356,9 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) { return Result; } -const FunctionType *Intrinsic::getType(LLVMContext &Context, +FunctionType *Intrinsic::getType(LLVMContext &Context, ID id, ArrayRef<Type*> Tys) { - const Type *ResultTy = NULL; + Type *ResultTy = NULL; std::vector<Type*> ArgTys; bool IsVarArg = false; @@ -416,8 +417,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const { /// FIXME: Remove after <rdar://problem/8031714> is fixed. /// FIXME: Is the above FIXME valid? bool Function::callsFunctionThatReturnsTwice() const { - const Module *M = this->getParent(); - static const char *ReturnsTwiceFns[] = { + static const char *const ReturnsTwiceFns[] = { "_setjmp", "setjmp", "sigsetjmp", @@ -428,16 +428,25 @@ bool Function::callsFunctionThatReturnsTwice() const { "getcontext" }; - for (unsigned I = 0; I < array_lengthof(ReturnsTwiceFns); ++I) - if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { - if (!Callee->use_empty()) - for (Value::const_use_iterator - I = Callee->use_begin(), E = Callee->use_end(); - I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(*I)) - if (CI->getParent()->getParent() == this) - return true; + for (const_inst_iterator I = inst_begin(this), E = inst_end(this); I != E; + ++I) { + const CallInst* callInst = dyn_cast<CallInst>(&*I); + if (!callInst) + continue; + if (callInst->canReturnTwice()) + return true; + + // check for known function names. + // FIXME: move this to clang. + Function *F = callInst->getCalledFunction(); + if (!F) + continue; + StringRef Name = F->getName(); + for (unsigned J = 0, e = array_lengthof(ReturnsTwiceFns); J != e; ++J) { + if (Name == ReturnsTwiceFns[J]) + return true; } + } return false; } diff --git a/contrib/llvm/lib/VMCore/GCOV.cpp b/contrib/llvm/lib/VMCore/GCOV.cpp new file mode 100644 index 0000000..fc7f96f --- /dev/null +++ b/contrib/llvm/lib/VMCore/GCOV.cpp @@ -0,0 +1,281 @@ +//===- GCOVr.cpp - LLVM coverage tool -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// GCOV implements the interface to read and write coverage files that use +// 'gcov' format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GCOV.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/system_error.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// GCOVFile implementation. + +/// ~GCOVFile - Delete GCOVFile and its content. +GCOVFile::~GCOVFile() { + DeleteContainerPointers(Functions); +} + +/// isGCDAFile - Return true if Format identifies a .gcda file. +static bool isGCDAFile(GCOVFormat Format) { + return Format == GCDA_402 || Format == GCDA_404; +} + +/// isGCNOFile - Return true if Format identifies a .gcno file. +static bool isGCNOFile(GCOVFormat Format) { + return Format == GCNO_402 || Format == GCNO_404; +} + +/// read - Read GCOV buffer. +bool GCOVFile::read(GCOVBuffer &Buffer) { + GCOVFormat Format = Buffer.readGCOVFormat(); + if (Format == InvalidGCOV) + return false; + + unsigned i = 0; + while (1) { + GCOVFunction *GFun = NULL; + if (isGCDAFile(Format)) { + // Use existing function while reading .gcda file. + assert (i < Functions.size() && ".gcda data does not match .gcno data"); + GFun = Functions[i]; + } else if (isGCNOFile(Format)){ + GFun = new GCOVFunction(); + Functions.push_back(GFun); + } + if (!GFun || !GFun->read(Buffer, Format)) + break; + ++i; + } + return true; +} + +/// dump - Dump GCOVFile content on standard out for debugging purposes. +void GCOVFile::dump() { + for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(), + E = Functions.end(); I != E; ++I) + (*I)->dump(); +} + +/// collectLineCounts - Collect line counts. This must be used after +/// reading .gcno and .gcda files. +void GCOVFile::collectLineCounts(FileInfo &FI) { + for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(), + E = Functions.end(); I != E; ++I) + (*I)->collectLineCounts(FI); + FI.print(); +} + +//===----------------------------------------------------------------------===// +// GCOVFunction implementation. + +/// ~GCOVFunction - Delete GCOVFunction and its content. +GCOVFunction::~GCOVFunction() { + DeleteContainerPointers(Blocks); +} + +/// read - Read a aunction from the buffer. Return false if buffer cursor +/// does not point to a function tag. +bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) { + if (!Buff.readFunctionTag()) + return false; + + Buff.readInt(); // Function header length + Ident = Buff.readInt(); + Buff.readInt(); // Checksum #1 + if (Format != GCNO_402) + Buff.readInt(); // Checksum #2 + + Name = Buff.readString(); + if (Format == GCNO_402 || Format == GCNO_404) + Filename = Buff.readString(); + + if (Format == GCDA_402 || Format == GCDA_404) { + Buff.readArcTag(); + uint32_t Count = Buff.readInt() / 2; + for (unsigned i = 0, e = Count; i != e; ++i) { + Blocks[i]->addCount(Buff.readInt64()); + } + return true;; + } + + LineNumber = Buff.readInt(); + + // read blocks. + assert (Buff.readBlockTag() && "Block Tag not found!"); + uint32_t BlockCount = Buff.readInt(); + for (int i = 0, e = BlockCount; i != e; ++i) { + Buff.readInt(); // Block flags; + Blocks.push_back(new GCOVBlock(i)); + } + + // read edges. + while (Buff.readEdgeTag()) { + uint32_t EdgeCount = (Buff.readInt() - 1) / 2; + uint32_t BlockNo = Buff.readInt(); + assert (BlockNo < BlockCount && "Unexpected Block number!"); + for (int i = 0, e = EdgeCount; i != e; ++i) { + Blocks[BlockNo]->addEdge(Buff.readInt()); + Buff.readInt(); // Edge flag + } + } + + // read line table. + while (Buff.readLineTag()) { + uint32_t LineTableLength = Buff.readInt(); + uint32_t Size = Buff.getCursor() + LineTableLength*4; + uint32_t BlockNo = Buff.readInt(); + assert (BlockNo < BlockCount && "Unexpected Block number!"); + GCOVBlock *Block = Blocks[BlockNo]; + Buff.readInt(); // flag + while (Buff.getCursor() != (Size - 4)) { + StringRef Filename = Buff.readString(); + if (Buff.getCursor() == (Size - 4)) break; + while (uint32_t L = Buff.readInt()) + Block->addLine(Filename, L); + } + Buff.readInt(); // flag + } + return true; +} + +/// dump - Dump GCOVFunction content on standard out for debugging purposes. +void GCOVFunction::dump() { + outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; + for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) + (*I)->dump(); +} + +/// collectLineCounts - Collect line counts. This must be used after +/// reading .gcno and .gcda files. +void GCOVFunction::collectLineCounts(FileInfo &FI) { + for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) + (*I)->collectLineCounts(FI); +} + +//===----------------------------------------------------------------------===// +// GCOVBlock implementation. + +/// ~GCOVBlock - Delete GCOVBlock and its content. +GCOVBlock::~GCOVBlock() { + Edges.clear(); + DeleteContainerSeconds(Lines); +} + +void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) { + GCOVLines *&LinesForFile = Lines[Filename]; + if (!LinesForFile) + LinesForFile = new GCOVLines(); + LinesForFile->add(LineNo); +} + +/// collectLineCounts - Collect line counts. This must be used after +/// reading .gcno and .gcda files. +void GCOVBlock::collectLineCounts(FileInfo &FI) { + for (StringMap<GCOVLines *>::iterator I = Lines.begin(), + E = Lines.end(); I != E; ++I) + I->second->collectLineCounts(FI, I->first(), Counter); +} + +/// dump - Dump GCOVBlock content on standard out for debugging purposes. +void GCOVBlock::dump() { + outs() << "Block : " << Number << " Counter : " << Counter << "\n"; + if (!Edges.empty()) { + outs() << "\tEdges : "; + for (SmallVector<uint32_t, 16>::iterator I = Edges.begin(), E = Edges.end(); + I != E; ++I) + outs() << (*I) << ","; + outs() << "\n"; + } + if (!Lines.empty()) { + outs() << "\tLines : "; + for (StringMap<GCOVLines *>::iterator LI = Lines.begin(), + LE = Lines.end(); LI != LE; ++LI) { + outs() << LI->first() << " -> "; + LI->second->dump(); + outs() << "\n"; + } + } +} + +//===----------------------------------------------------------------------===// +// GCOVLines implementation. + +/// collectLineCounts - Collect line counts. This must be used after +/// reading .gcno and .gcda files. +void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename, + uint32_t Count) { + for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(), + E = Lines.end(); I != E; ++I) + FI.addLineCount(Filename, *I, Count); +} + +/// dump - Dump GCOVLines content on standard out for debugging purposes. +void GCOVLines::dump() { + for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(), + E = Lines.end(); I != E; ++I) + outs() << (*I) << ","; +} + +//===----------------------------------------------------------------------===// +// FileInfo implementation. + +/// addLineCount - Add line count for the given line number in a file. +void FileInfo::addLineCount(StringRef Filename, uint32_t Line, uint32_t Count) { + if (LineInfo.find(Filename) == LineInfo.end()) { + OwningPtr<MemoryBuffer> Buff; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { + errs() << Filename << ": " << ec.message() << "\n"; + return; + } + StringRef AllLines = Buff.take()->getBuffer(); + LineCounts L(AllLines.count('\n')+2); + L[Line-1] = Count; + LineInfo[Filename] = L; + return; + } + LineCounts &L = LineInfo[Filename]; + L[Line-1] = Count; +} + +/// print - Print source files with collected line count information. +void FileInfo::print() { + for (StringMap<LineCounts>::iterator I = LineInfo.begin(), E = LineInfo.end(); + I != E; ++I) { + StringRef Filename = I->first(); + outs() << Filename << "\n"; + LineCounts &L = LineInfo[Filename]; + OwningPtr<MemoryBuffer> Buff; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { + errs() << Filename << ": " << ec.message() << "\n"; + return; + } + StringRef AllLines = Buff.take()->getBuffer(); + for (unsigned i = 0, e = L.size(); i != e; ++i) { + if (L[i]) + outs() << L[i] << ":\t"; + else + outs() << " :\t"; + std::pair<StringRef, StringRef> P = AllLines.split('\n'); + if (AllLines != P.first) + outs() << P.first; + outs() << "\n"; + AllLines = P.second; + } + } +} + + diff --git a/contrib/llvm/lib/VMCore/Globals.cpp b/contrib/llvm/lib/VMCore/Globals.cpp index db008e0..4254fb2 100644 --- a/contrib/llvm/lib/VMCore/Globals.cpp +++ b/contrib/llvm/lib/VMCore/Globals.cpp @@ -80,7 +80,7 @@ bool GlobalValue::isDeclaration() const { // GlobalVariable Implementation //===----------------------------------------------------------------------===// -GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link, +GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, bool ThreadLocal, unsigned AddressSpace) : GlobalValue(PointerType::get(Ty, AddressSpace), @@ -97,7 +97,7 @@ GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link, LeakDetector::addGarbageObject(this); } -GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant, +GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, GlobalVariable *Before, bool ThreadLocal, @@ -186,7 +186,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { // GlobalAlias Implementation //===----------------------------------------------------------------------===// -GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link, +GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link, const Twine &Name, Constant* aliasee, Module *ParentModule) : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) { @@ -235,7 +235,7 @@ const GlobalValue *GlobalAlias::getAliasedGlobal() const { CE->getOpcode() == Instruction::GetElementPtr) && "Unsupported aliasee"); - return dyn_cast<GlobalValue>(CE->getOperand(0)); + return cast<GlobalValue>(CE->getOperand(0)); } const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const { diff --git a/contrib/llvm/lib/VMCore/IRBuilder.cpp b/contrib/llvm/lib/VMCore/IRBuilder.cpp index ffe961f..5114e2d 100644 --- a/contrib/llvm/lib/VMCore/IRBuilder.cpp +++ b/contrib/llvm/lib/VMCore/IRBuilder.cpp @@ -40,7 +40,7 @@ Type *IRBuilderBase::getCurrentFunctionReturnType() const { } Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) { - const PointerType *PT = cast<PointerType>(Ptr->getType()); + PointerType *PT = cast<PointerType>(Ptr->getType()); if (PT->getElementType()->isIntegerTy(8)) return Ptr; diff --git a/contrib/llvm/lib/VMCore/InlineAsm.cpp b/contrib/llvm/lib/VMCore/InlineAsm.cpp index 4a03b39..736e370 100644 --- a/contrib/llvm/lib/VMCore/InlineAsm.cpp +++ b/contrib/llvm/lib/VMCore/InlineAsm.cpp @@ -25,7 +25,7 @@ InlineAsm::~InlineAsm() { } -InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString, +InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack) { InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack); @@ -33,7 +33,7 @@ InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString, return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key); } -InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString, +InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString, const std::string &constraints, bool hasSideEffects, bool isAlignStack) : Value(Ty, Value::InlineAsmVal), @@ -242,7 +242,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) { /// Verify - Verify that the specified constraint string is reasonable for the /// specified function type, and otherwise validate the constraint string. -bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { +bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { if (Ty->isVarArg()) return false; ConstraintInfoVector Constraints = ParseConstraints(ConstStr); @@ -282,7 +282,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { if (Ty->getReturnType()->isStructTy()) return false; break; default: - const StructType *STy = dyn_cast<StructType>(Ty->getReturnType()); + StructType *STy = dyn_cast<StructType>(Ty->getReturnType()); if (STy == 0 || STy->getNumElements() != NumOutputs) return false; break; diff --git a/contrib/llvm/lib/VMCore/Instruction.cpp b/contrib/llvm/lib/VMCore/Instruction.cpp index 02c0757..73191c1 100644 --- a/contrib/llvm/lib/VMCore/Instruction.cpp +++ b/contrib/llvm/lib/VMCore/Instruction.cpp @@ -20,7 +20,7 @@ #include "llvm/Support/LeakDetector.h" using namespace llvm; -Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps, +Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, Instruction *InsertBefore) : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) { // Make sure that we get added to a basicblock @@ -34,7 +34,7 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps, } } -Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps, +Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) { // Make sure that we get added to a basicblock @@ -101,6 +101,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case Switch: return "switch"; case IndirectBr: return "indirectbr"; case Invoke: return "invoke"; + case Resume: return "resume"; case Unwind: return "unwind"; case Unreachable: return "unreachable"; @@ -127,6 +128,9 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case Alloca: return "alloca"; case Load: return "load"; case Store: return "store"; + case AtomicCmpXchg: return "cmpxchg"; + case AtomicRMW: return "atomicrmw"; + case Fence: return "fence"; case GetElementPtr: return "getelementptr"; // Convert instructions... @@ -158,6 +162,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case ShuffleVector: return "shufflevector"; case ExtractValue: return "extractvalue"; case InsertValue: return "insertvalue"; + case LandingPad: return "landingpad"; default: return "<Invalid operator> "; } @@ -191,10 +196,14 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(this)) return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I)->getAlignment(); + LI->getAlignment() == cast<LoadInst>(I)->getAlignment() && + LI->getOrdering() == cast<LoadInst>(I)->getOrdering() && + LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(this)) return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I)->getAlignment(); + SI->getAlignment() == cast<StoreInst>(I)->getAlignment() && + SI->getOrdering() == cast<StoreInst>(I)->getOrdering() && + SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(this)) return CI->getPredicate() == cast<CmpInst>(I)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(this)) @@ -208,6 +217,18 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices(); if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices(); + if (const FenceInst *FI = dyn_cast<FenceInst>(this)) + return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() && + FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this)) + return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() && + CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() && + CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this)) + return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() && + RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() && + RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() && + RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope(); return true; } @@ -230,10 +251,14 @@ bool Instruction::isSameOperationAs(const Instruction *I) const { // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(this)) return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I)->getAlignment(); + LI->getAlignment() == cast<LoadInst>(I)->getAlignment() && + LI->getOrdering() == cast<LoadInst>(I)->getOrdering() && + LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(this)) return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I)->getAlignment(); + SI->getAlignment() == cast<StoreInst>(I)->getAlignment() && + SI->getOrdering() == cast<StoreInst>(I)->getOrdering() && + SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(this)) return CI->getPredicate() == cast<CmpInst>(I)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(this)) @@ -248,6 +273,18 @@ bool Instruction::isSameOperationAs(const Instruction *I) const { return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices(); if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices(); + if (const FenceInst *FI = dyn_cast<FenceInst>(this)) + return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() && + FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this)) + return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() && + CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() && + CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this)) + return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() && + RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() && + RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() && + RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope(); return true; } @@ -280,13 +317,16 @@ bool Instruction::mayReadFromMemory() const { default: return false; case Instruction::VAArg: case Instruction::Load: + case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: return true; case Instruction::Call: return !cast<CallInst>(this)->doesNotAccessMemory(); case Instruction::Invoke: return !cast<InvokeInst>(this)->doesNotAccessMemory(); case Instruction::Store: - return cast<StoreInst>(this)->isVolatile(); + return !cast<StoreInst>(this)->isUnordered(); } } @@ -295,15 +335,18 @@ bool Instruction::mayReadFromMemory() const { bool Instruction::mayWriteToMemory() const { switch (getOpcode()) { default: return false; + case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory case Instruction::Store: case Instruction::VAArg: + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: return true; case Instruction::Call: return !cast<CallInst>(this)->onlyReadsMemory(); case Instruction::Invoke: return !cast<InvokeInst>(this)->onlyReadsMemory(); case Instruction::Load: - return cast<LoadInst>(this)->isVolatile(); + return !cast<LoadInst>(this)->isUnordered(); } } @@ -312,7 +355,7 @@ bool Instruction::mayWriteToMemory() const { bool Instruction::mayThrow() const { if (const CallInst *CI = dyn_cast<CallInst>(this)) return !CI->doesNotThrow(); - return false; + return isa<ResumeInst>(this); } /// isAssociative - Return true if the instruction is associative: @@ -372,7 +415,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const { } case Load: { const LoadInst *LI = cast<LoadInst>(this); - if (LI->isVolatile()) + if (!LI->isUnordered()) return false; return LI->getPointerOperand()->isDereferenceablePointer(); } @@ -392,6 +435,11 @@ bool Instruction::isSafeToSpeculativelyExecute() const { case Switch: case Unwind: case Unreachable: + case Fence: + case LandingPad: + case AtomicRMW: + case AtomicCmpXchg: + case Resume: return false; // Misc instructions which have effects } } diff --git a/contrib/llvm/lib/VMCore/Instructions.cpp b/contrib/llvm/lib/VMCore/Instructions.cpp index 9baad09..b3a7205 100644 --- a/contrib/llvm/lib/VMCore/Instructions.cpp +++ b/contrib/llvm/lib/VMCore/Instructions.cpp @@ -62,11 +62,11 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { if (Op1->getType() != Op2->getType()) return "both values to select must have same type"; - if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) { + if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) { // Vector select. if (VT->getElementType() != Type::getInt1Ty(Op0->getContext())) return "vector select condition element type must be i1"; - const VectorType *ET = dyn_cast<VectorType>(Op1->getType()); + VectorType *ET = dyn_cast<VectorType>(Op1->getType()); if (ET == 0) return "selected values for vector select must be vectors"; if (ET->getNumElements() != VT->getNumElements()) @@ -166,6 +166,88 @@ Value *PHINode::hasConstantValue() const { return ConstantValue; } +//===----------------------------------------------------------------------===// +// LandingPadInst Implementation +//===----------------------------------------------------------------------===// + +LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn, + unsigned NumReservedValues, const Twine &NameStr, + Instruction *InsertBefore) + : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) { + init(PersonalityFn, 1 + NumReservedValues, NameStr); +} + +LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn, + unsigned NumReservedValues, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) { + init(PersonalityFn, 1 + NumReservedValues, NameStr); +} + +LandingPadInst::LandingPadInst(const LandingPadInst &LP) + : Instruction(LP.getType(), Instruction::LandingPad, + allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()), + ReservedSpace(LP.getNumOperands()) { + Use *OL = OperandList, *InOL = LP.OperandList; + for (unsigned I = 0, E = ReservedSpace; I != E; ++I) + OL[I] = InOL[I]; + + setCleanup(LP.isCleanup()); +} + +LandingPadInst::~LandingPadInst() { + dropHungoffUses(); +} + +LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn, + unsigned NumReservedClauses, + const Twine &NameStr, + Instruction *InsertBefore) { + return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr, + InsertBefore); +} + +LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn, + unsigned NumReservedClauses, + const Twine &NameStr, + BasicBlock *InsertAtEnd) { + return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr, + InsertAtEnd); +} + +void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues, + const Twine &NameStr) { + ReservedSpace = NumReservedValues; + NumOperands = 1; + OperandList = allocHungoffUses(ReservedSpace); + OperandList[0] = PersFn; + setName(NameStr); + setCleanup(false); +} + +/// growOperands - grow operands - This grows the operand list in response to a +/// push_back style of operation. This grows the number of ops by 2 times. +void LandingPadInst::growOperands(unsigned Size) { + unsigned e = getNumOperands(); + if (ReservedSpace >= e + Size) return; + ReservedSpace = (e + Size / 2) * 2; + + Use *NewOps = allocHungoffUses(ReservedSpace); + Use *OldOps = OperandList; + for (unsigned i = 0; i != e; ++i) + NewOps[i] = OldOps[i]; + + OperandList = NewOps; + Use::zap(OldOps, OldOps + e, true); +} + +void LandingPadInst::addClause(Value *Val) { + unsigned OpNo = getNumOperands(); + growOperands(1); + assert(OpNo < ReservedSpace && "Growing didn't work!"); + ++NumOperands; + OperandList[OpNo] = Val; +} //===----------------------------------------------------------------------===// // CallInst Implementation @@ -179,7 +261,7 @@ void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) { Op<-1>() = Func; #ifndef NDEBUG - const FunctionType *FTy = + FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); assert((Args.size() == FTy->getNumParams() || @@ -201,7 +283,7 @@ void CallInst::init(Value *Func, const Twine &NameStr) { Op<-1>() = Func; #ifndef NDEBUG - const FunctionType *FTy = + FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); assert(FTy->getNumParams() == 0 && "Calling a function with bad signature"); @@ -269,8 +351,8 @@ static bool IsConstantOne(Value *val) { } static Instruction *createMalloc(Instruction *InsertBefore, - BasicBlock *InsertAtEnd, const Type *IntPtrTy, - const Type *AllocTy, Value *AllocSize, + BasicBlock *InsertAtEnd, Type *IntPtrTy, + Type *AllocTy, Value *AllocSize, Value *ArraySize, Function *MallocF, const Twine &Name) { assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) && @@ -319,7 +401,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, if (!MallocFunc) // prototype malloc as "void *malloc(size_t)" MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL); - const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy); + PointerType *AllocPtrType = PointerType::getUnqual(AllocTy); CallInst *MCall = NULL; Instruction *Result = NULL; if (InsertBefore) { @@ -354,7 +436,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, /// 2. Call malloc with that argument. /// 3. Bitcast the result of the malloc call to the specified type. Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, - const Type *IntPtrTy, const Type *AllocTy, + Type *IntPtrTy, Type *AllocTy, Value *AllocSize, Value *ArraySize, Function * MallocF, const Twine &Name) { @@ -371,7 +453,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, /// Note: This function does not add the bitcast to the basic block, that is the /// responsibility of the caller. Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, - const Type *IntPtrTy, const Type *AllocTy, + Type *IntPtrTy, Type *AllocTy, Value *AllocSize, Value *ArraySize, Function *MallocF, const Twine &Name) { return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, @@ -388,8 +470,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; Module* M = BB->getParent()->getParent(); - const Type *VoidTy = Type::getVoidTy(M->getContext()); - const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext()); + Type *VoidTy = Type::getVoidTy(M->getContext()); + Type *IntPtrTy = Type::getInt8PtrTy(M->getContext()); // prototype free as "void free(void*)" Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL); CallInst* Result = NULL; @@ -436,7 +518,7 @@ void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException, Op<-1>() = IfException; #ifndef NDEBUG - const FunctionType *FTy = + FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()); assert(((Args.size() == FTy->getNumParams()) || @@ -494,6 +576,9 @@ void InvokeInst::removeAttribute(unsigned i, Attributes attr) { setAttributes(PAL); } +LandingPadInst *InvokeInst::getLandingPadInst() const { + return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI()); +} //===----------------------------------------------------------------------===// // ReturnInst Implementation @@ -574,6 +659,41 @@ BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const { } //===----------------------------------------------------------------------===// +// ResumeInst Implementation +//===----------------------------------------------------------------------===// + +ResumeInst::ResumeInst(const ResumeInst &RI) + : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume, + OperandTraits<ResumeInst>::op_begin(this), 1) { + Op<0>() = RI.Op<0>(); +} + +ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore) + : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume, + OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) { + Op<0>() = Exn; +} + +ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd) + : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume, + OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) { + Op<0>() = Exn; +} + +unsigned ResumeInst::getNumSuccessorsV() const { + return getNumSuccessors(); +} + +void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { + llvm_unreachable("ResumeInst has no successors!"); +} + +BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { + llvm_unreachable("ResumeInst has no successors!"); + return 0; +} + +//===----------------------------------------------------------------------===// // UnreachableInst Implementation //===----------------------------------------------------------------------===// @@ -665,6 +785,27 @@ BranchInst::BranchInst(const BranchInst &BI) : SubclassOptionalData = BI.SubclassOptionalData; } +void BranchInst::swapSuccessors() { + assert(isConditional() && + "Cannot swap successors of an unconditional branch"); + Op<-1>().swap(Op<-2>()); + + // Update profile metadata if present and it matches our structural + // expectations. + MDNode *ProfileData = getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) + return; + + // The first operand is the name. Fetch them backwards and build a new one. + Value *Ops[] = { + ProfileData->getOperand(0), + ProfileData->getOperand(2), + ProfileData->getOperand(1) + }; + setMetadata(LLVMContext::MD_prof, + MDNode::get(ProfileData->getContext(), Ops)); +} + BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } @@ -692,7 +833,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { return Amt; } -AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name, Instruction *InsertBefore) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { @@ -701,7 +842,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, setName(Name); } -AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name, BasicBlock *InsertAtEnd) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { @@ -710,7 +851,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, setName(Name); } -AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, +AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertBefore) { @@ -719,7 +860,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, setName(Name); } -AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, +AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertAtEnd) { @@ -728,7 +869,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, setName(Name); } -AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align, const Twine &Name, Instruction *InsertBefore) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { @@ -737,7 +878,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, setName(Name); } -AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align, const Twine &Name, BasicBlock *InsertAtEnd) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { @@ -787,6 +928,8 @@ bool AllocaInst::isStaticAlloca() const { void LoadInst::AssertOK() { assert(getOperand(0)->getType()->isPointerTy() && "Ptr must have pointer type."); + assert(!(isAtomic() && getAlignment() == 0) && + "Alignment required for atomic load"); } LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef) @@ -794,6 +937,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef) Load, Ptr, InsertBef) { setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); setName(Name); } @@ -803,6 +947,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE) Load, Ptr, InsertAE) { setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); setName(Name); } @@ -813,6 +958,18 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, Load, Ptr, InsertBef) { setVolatile(isVolatile); setAlignment(0); + setAtomic(NotAtomic); + AssertOK(); + setName(Name); +} + +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, + BasicBlock *InsertAE) + : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), + Load, Ptr, InsertAE) { + setVolatile(isVolatile); + setAlignment(0); + setAtomic(NotAtomic); AssertOK(); setName(Name); } @@ -823,6 +980,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, Load, Ptr, InsertBef) { setVolatile(isVolatile); setAlignment(Align); + setAtomic(NotAtomic); AssertOK(); setName(Name); } @@ -833,27 +991,43 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, Load, Ptr, InsertAE) { setVolatile(isVolatile); setAlignment(Align); + setAtomic(NotAtomic); AssertOK(); setName(Name); } -LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, + unsigned Align, AtomicOrdering Order, + SynchronizationScope SynchScope, + Instruction *InsertBef) + : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), + Load, Ptr, InsertBef) { + setVolatile(isVolatile); + setAlignment(Align); + setAtomic(Order, SynchScope); + AssertOK(); + setName(Name); +} + +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, + unsigned Align, AtomicOrdering Order, + SynchronizationScope SynchScope, BasicBlock *InsertAE) : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), Load, Ptr, InsertAE) { setVolatile(isVolatile); - setAlignment(0); + setAlignment(Align); + setAtomic(Order, SynchScope); AssertOK(); setName(Name); } - - LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef) : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), Load, Ptr, InsertBef) { setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); if (Name && Name[0]) setName(Name); } @@ -863,6 +1037,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE) Load, Ptr, InsertAE) { setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); if (Name && Name[0]) setName(Name); } @@ -873,6 +1048,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile, Load, Ptr, InsertBef) { setVolatile(isVolatile); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); if (Name && Name[0]) setName(Name); } @@ -883,6 +1059,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile, Load, Ptr, InsertAE) { setVolatile(isVolatile); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); if (Name && Name[0]) setName(Name); } @@ -891,7 +1068,7 @@ void LoadInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); assert(Align <= MaximumAlignment && "Alignment is greater than MaximumAlignment!"); - setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | + setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | ((Log2_32(Align)+1)<<1)); assert(getAlignment() == Align && "Alignment representation error!"); } @@ -907,6 +1084,8 @@ void StoreInst::AssertOK() { assert(getOperand(0)->getType() == cast<PointerType>(getOperand(1)->getType())->getElementType() && "Ptr must be a pointer to Val type!"); + assert(!(isAtomic() && getAlignment() == 0) && + "Alignment required for atomic load"); } @@ -919,6 +1098,7 @@ StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore) Op<1>() = addr; setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); } @@ -931,6 +1111,7 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd) Op<1>() = addr; setVolatile(false); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); } @@ -944,6 +1125,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(0); + setAtomic(NotAtomic); AssertOK(); } @@ -957,6 +1139,37 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); + setAtomic(NotAtomic); + AssertOK(); +} + +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, + unsigned Align, AtomicOrdering Order, + SynchronizationScope SynchScope, + Instruction *InsertBefore) + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits<StoreInst>::op_begin(this), + OperandTraits<StoreInst>::operands(this), + InsertBefore) { + Op<0>() = val; + Op<1>() = addr; + setVolatile(isVolatile); + setAlignment(Align); + setAtomic(Order, SynchScope); + AssertOK(); +} + +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, + BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits<StoreInst>::op_begin(this), + OperandTraits<StoreInst>::operands(this), + InsertAtEnd) { + Op<0>() = val; + Op<1>() = addr; + setVolatile(isVolatile); + setAlignment(0); + setAtomic(NotAtomic); AssertOK(); } @@ -970,10 +1183,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); + setAtomic(NotAtomic); AssertOK(); } StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, + unsigned Align, AtomicOrdering Order, + SynchronizationScope SynchScope, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits<StoreInst>::op_begin(this), @@ -982,7 +1198,8 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); - setAlignment(0); + setAlignment(Align); + setAtomic(Order, SynchScope); AssertOK(); } @@ -990,37 +1207,135 @@ void StoreInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); assert(Align <= MaximumAlignment && "Alignment is greater than MaximumAlignment!"); - setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | + setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | ((Log2_32(Align)+1) << 1)); assert(getAlignment() == Align && "Alignment representation error!"); } //===----------------------------------------------------------------------===// -// GetElementPtrInst Implementation +// AtomicCmpXchgInst Implementation //===----------------------------------------------------------------------===// -static unsigned retrieveAddrSpace(const Value *Val) { - return cast<PointerType>(Val->getType())->getAddressSpace(); +void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + Op<0>() = Ptr; + Op<1>() = Cmp; + Op<2>() = NewVal; + setOrdering(Ordering); + setSynchScope(SynchScope); + + assert(getOperand(0) && getOperand(1) && getOperand(2) && + "All operands must be non-null!"); + assert(getOperand(0)->getType()->isPointerTy() && + "Ptr must have pointer type!"); + assert(getOperand(1)->getType() == + cast<PointerType>(getOperand(0)->getType())->getElementType() + && "Ptr must be a pointer to Cmp type!"); + assert(getOperand(2)->getType() == + cast<PointerType>(getOperand(0)->getType())->getElementType() + && "Ptr must be a pointer to NewVal type!"); + assert(Ordering != NotAtomic && + "AtomicCmpXchg instructions must be atomic!"); +} + +AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, + AtomicOrdering Ordering, + SynchronizationScope SynchScope, + Instruction *InsertBefore) + : Instruction(Cmp->getType(), AtomicCmpXchg, + OperandTraits<AtomicCmpXchgInst>::op_begin(this), + OperandTraits<AtomicCmpXchgInst>::operands(this), + InsertBefore) { + Init(Ptr, Cmp, NewVal, Ordering, SynchScope); } -void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx, - const Twine &Name) { - assert(NumOperands == 1+NumIdx && "NumOperands not initialized?"); - Use *OL = OperandList; - OL[0] = Ptr; +AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, + AtomicOrdering Ordering, + SynchronizationScope SynchScope, + BasicBlock *InsertAtEnd) + : Instruction(Cmp->getType(), AtomicCmpXchg, + OperandTraits<AtomicCmpXchgInst>::op_begin(this), + OperandTraits<AtomicCmpXchgInst>::operands(this), + InsertAtEnd) { + Init(Ptr, Cmp, NewVal, Ordering, SynchScope); +} + +//===----------------------------------------------------------------------===// +// AtomicRMWInst Implementation +//===----------------------------------------------------------------------===// - for (unsigned i = 0; i != NumIdx; ++i) - OL[i+1] = Idx[i]; +void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + Op<0>() = Ptr; + Op<1>() = Val; + setOperation(Operation); + setOrdering(Ordering); + setSynchScope(SynchScope); - setName(Name); + assert(getOperand(0) && getOperand(1) && + "All operands must be non-null!"); + assert(getOperand(0)->getType()->isPointerTy() && + "Ptr must have pointer type!"); + assert(getOperand(1)->getType() == + cast<PointerType>(getOperand(0)->getType())->getElementType() + && "Ptr must be a pointer to Val type!"); + assert(Ordering != NotAtomic && + "AtomicRMW instructions must be atomic!"); } -void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) { - assert(NumOperands == 2 && "NumOperands not initialized?"); - Use *OL = OperandList; - OL[0] = Ptr; - OL[1] = Idx; +AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, + AtomicOrdering Ordering, + SynchronizationScope SynchScope, + Instruction *InsertBefore) + : Instruction(Val->getType(), AtomicRMW, + OperandTraits<AtomicRMWInst>::op_begin(this), + OperandTraits<AtomicRMWInst>::operands(this), + InsertBefore) { + Init(Operation, Ptr, Val, Ordering, SynchScope); +} + +AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, + AtomicOrdering Ordering, + SynchronizationScope SynchScope, + BasicBlock *InsertAtEnd) + : Instruction(Val->getType(), AtomicRMW, + OperandTraits<AtomicRMWInst>::op_begin(this), + OperandTraits<AtomicRMWInst>::operands(this), + InsertAtEnd) { + Init(Operation, Ptr, Val, Ordering, SynchScope); +} + +//===----------------------------------------------------------------------===// +// FenceInst Implementation +//===----------------------------------------------------------------------===// + +FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, + SynchronizationScope SynchScope, + Instruction *InsertBefore) + : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) { + setOrdering(Ordering); + setSynchScope(SynchScope); +} +FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, + SynchronizationScope SynchScope, + BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) { + setOrdering(Ordering); + setSynchScope(SynchScope); +} + +//===----------------------------------------------------------------------===// +// GetElementPtrInst Implementation +//===----------------------------------------------------------------------===// + +void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList, + const Twine &Name) { + assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?"); + OperandList[0] = Ptr; + std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1); setName(Name); } @@ -1029,34 +1344,10 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) OperandTraits<GetElementPtrInst>::op_end(this) - GEPI.getNumOperands(), GEPI.getNumOperands()) { - Use *OL = OperandList; - Use *GEPIOL = GEPI.OperandList; - for (unsigned i = 0, E = NumOperands; i != E; ++i) - OL[i] = GEPIOL[i]; + std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin()); SubclassOptionalData = GEPI.SubclassOptionalData; } -GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, - const Twine &Name, Instruction *InBe) - : Instruction(PointerType::get( - checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)), - GetElementPtr, - OperandTraits<GetElementPtrInst>::op_end(this) - 2, - 2, InBe) { - init(Ptr, Idx, Name); -} - -GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, - const Twine &Name, BasicBlock *IAE) - : Instruction(PointerType::get( - checkGEPType(getIndexedType(Ptr->getType(),Idx)), - retrieveAddrSpace(Ptr)), - GetElementPtr, - OperandTraits<GetElementPtrInst>::op_end(this) - 2, - 2, IAE) { - init(Ptr, Idx, Name); -} - /// getIndexedType - Returns the type of the element that would be accessed with /// a gep instruction with the specified parameters. /// @@ -1067,14 +1358,13 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, /// pointer type. /// template <typename IndexTy> -static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs, - unsigned NumIdx) { - const PointerType *PTy = dyn_cast<PointerType>(Ptr); +static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) { + PointerType *PTy = dyn_cast<PointerType>(Ptr); if (!PTy) return 0; // Type isn't a pointer type! Type *Agg = PTy->getElementType(); // Handle the special case of the empty set index set, which is always valid. - if (NumIdx == 0) + if (IdxList.empty()) return Agg; // If there is at least one index, the top level type must be sized, otherwise @@ -1083,44 +1373,29 @@ static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs, return 0; unsigned CurIdx = 1; - for (; CurIdx != NumIdx; ++CurIdx) { + for (; CurIdx != IdxList.size(); ++CurIdx) { CompositeType *CT = dyn_cast<CompositeType>(Agg); if (!CT || CT->isPointerTy()) return 0; - IndexTy Index = Idxs[CurIdx]; + IndexTy Index = IdxList[CurIdx]; if (!CT->indexValid(Index)) return 0; Agg = CT->getTypeAtIndex(Index); } - return CurIdx == NumIdx ? Agg : 0; + return CurIdx == IdxList.size() ? Agg : 0; } -Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs, - unsigned NumIdx) { - return getIndexedTypeInternal(Ptr, Idxs, NumIdx); +Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) { + return getIndexedTypeInternal(Ptr, IdxList); } -Type *GetElementPtrInst::getIndexedType(const Type *Ptr, - Constant* const *Idxs, - unsigned NumIdx) { - return getIndexedTypeInternal(Ptr, Idxs, NumIdx); +Type *GetElementPtrInst::getIndexedType(Type *Ptr, + ArrayRef<Constant *> IdxList) { + return getIndexedTypeInternal(Ptr, IdxList); } -Type *GetElementPtrInst::getIndexedType(const Type *Ptr, - uint64_t const *Idxs, - unsigned NumIdx) { - return getIndexedTypeInternal(Ptr, Idxs, NumIdx); +Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) { + return getIndexedTypeInternal(Ptr, IdxList); } -Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) { - const PointerType *PTy = dyn_cast<PointerType>(Ptr); - if (!PTy) return 0; // Type isn't a pointer type! - - // Check the pointer index. - if (!PTy->indexValid(Idx)) return 0; - - return PTy->getElementType(); -} - - /// hasAllZeroIndices - Return true if all of the indices of this GEP are /// zeros. If so, the result pointer and the first operand have the same /// value, just potentially different types. @@ -1286,13 +1561,13 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType()) return false; - const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); + VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32)) return false; // Check to see if Mask is valid. if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) { - const VectorType *VTy = cast<VectorType>(V1->getType()); + VectorType *VTy = cast<VectorType>(V1->getType()); for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) { if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) { if (CI->uge(VTy->getNumElements()*2)) @@ -1382,7 +1657,7 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI) // A null type is returned if the indices are invalid for the specified // pointer type. // -Type *ExtractValueInst::getIndexedType(const Type *Agg, +Type *ExtractValueInst::getIndexedType(Type *Agg, ArrayRef<unsigned> Idxs) { for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) { unsigned Index = Idxs[CurIdx]; @@ -1392,10 +1667,10 @@ Type *ExtractValueInst::getIndexedType(const Type *Agg, // insertvalue we need to check array indexing manually. // Since the only other types we can index into are struct types it's just // as easy to check those manually as well. - if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) { + if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) { if (Index >= AT->getNumElements()) return 0; - } else if (const StructType *ST = dyn_cast<StructType>(Agg)) { + } else if (StructType *ST = dyn_cast<StructType>(Agg)) { if (Index >= ST->getNumElements()) return 0; } else { @@ -1413,7 +1688,7 @@ Type *ExtractValueInst::getIndexedType(const Type *Agg, //===----------------------------------------------------------------------===// BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, - const Type *Ty, const Twine &Name, + Type *Ty, const Twine &Name, Instruction *InsertBefore) : Instruction(Ty, iType, OperandTraits<BinaryOperator>::op_begin(this), @@ -1426,7 +1701,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, } BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, - const Type *Ty, const Twine &Name, + Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(Ty, iType, OperandTraits<BinaryOperator>::op_begin(this), @@ -1589,7 +1864,7 @@ BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name, BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, Instruction *InsertBefore) { Constant *C; - if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) { + if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) { C = Constant::getAllOnesValue(PTy->getElementType()); C = ConstantVector::get( std::vector<Constant*>(PTy->getNumElements(), C)); @@ -1604,7 +1879,7 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd) { Constant *AllOnes; - if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) { + if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) { // Create a vector of all ones values. Constant *Elt = Constant::getAllOnesValue(PTy->getElementType()); AllOnes = ConstantVector::get( @@ -1743,8 +2018,8 @@ bool CastInst::isLosslessCast() const { return false; // Identity cast is always lossless - const Type* SrcTy = getOperand(0)->getType(); - const Type* DstTy = getType(); + Type* SrcTy = getOperand(0)->getType(); + Type* DstTy = getType(); if (SrcTy == DstTy) return true; @@ -1763,12 +2038,12 @@ bool CastInst::isLosslessCast() const { /// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only /// @brief Determine if the described cast is a no-op. bool CastInst::isNoopCast(Instruction::CastOps Opcode, - const Type *SrcTy, - const Type *DestTy, - const Type *IntPtrTy) { + Type *SrcTy, + Type *DestTy, + Type *IntPtrTy) { switch (Opcode) { default: - assert(!"Invalid CastOp"); + assert(0 && "Invalid CastOp"); case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1791,7 +2066,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, } /// @brief Determine if a cast is a no-op. -bool CastInst::isNoopCast(const Type *IntPtrTy) const { +bool CastInst::isNoopCast(Type *IntPtrTy) const { return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); } @@ -1805,8 +2080,7 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const { /// If no such cast is permited, the function returns 0. unsigned CastInst::isEliminableCastPair( Instruction::CastOps firstOp, Instruction::CastOps secondOp, - const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy) -{ + Type *SrcTy, Type *MidTy, Type *DstTy, Type *IntPtrTy) { // Define the 144 possibilities for these two cast instructions. The values // in this matrix determine what to do in a given situation and select the // case in the switch below. The rows correspond to firstOp, the columns @@ -1859,12 +2133,16 @@ unsigned CastInst::isEliminableCastPair( }; // If either of the casts are a bitcast from scalar to vector, disallow the - // merging. - if ((firstOp == Instruction::BitCast && - isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) || - (secondOp == Instruction::BitCast && - isa<VectorType>(MidTy) != isa<VectorType>(DstTy))) - return 0; // Disallowed + // merging. However, bitcast of A->B->A are allowed. + bool isFirstBitcast = (firstOp == Instruction::BitCast); + bool isSecondBitcast = (secondOp == Instruction::BitCast); + bool chainedBitcast = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast); + + // Check if any of the bitcasts convert scalars<->vectors. + if ((isFirstBitcast && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) || + (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy))) + // Unless we are bitcasing to the original type, disallow optimizations. + if (!chainedBitcast) return 0; int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin] [secondOp-Instruction::CastOpsBegin]; @@ -1958,16 +2236,16 @@ unsigned CastInst::isEliminableCastPair( case 99: // cast combination can't happen (error in input). This is for all cases // where the MidTy is not the same for the two cast instructions. - assert(!"Invalid Cast Combination"); + assert(0 && "Invalid Cast Combination"); return 0; default: - assert(!"Error in CastResults table!!!"); + assert(0 && "Error in CastResults table!!!"); return 0; } return 0; } -CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, +CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { assert(castIsValid(op, S, Ty) && "Invalid cast!"); // Construct and return the appropriate CastInst subclass @@ -1985,12 +2263,12 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore); case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore); default: - assert(!"Invalid opcode provided"); + assert(0 && "Invalid opcode provided"); } return 0; } -CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, +CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { assert(castIsValid(op, S, Ty) && "Invalid cast!"); // Construct and return the appropriate CastInst subclass @@ -2008,12 +2286,12 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd); case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd); default: - assert(!"Invalid opcode provided"); + assert(0 && "Invalid opcode provided"); } return 0; } -CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2021,7 +2299,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, return Create(Instruction::ZExt, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2029,7 +2307,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2037,7 +2315,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, return Create(Instruction::SExt, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2045,7 +2323,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2053,7 +2331,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, return Create(Instruction::Trunc, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, +CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2061,7 +2339,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, +CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { assert(S->getType()->isPointerTy() && "Invalid cast"); @@ -2074,7 +2352,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, } /// @brief Create a BitCast or a PtrToInt cast instruction -CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, +CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { assert(S->getType()->isPointerTy() && "Invalid cast"); @@ -2086,7 +2364,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, +CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, bool isSigned, const Twine &Name, Instruction *InsertBefore) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && @@ -2100,7 +2378,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, return Create(opcode, C, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, +CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, bool isSigned, const Twine &Name, BasicBlock *InsertAtEnd) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && @@ -2114,7 +2392,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, return Create(opcode, C, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, +CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, const Twine &Name, Instruction *InsertBefore) { assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && @@ -2127,7 +2405,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, return Create(opcode, C, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, +CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && @@ -2142,15 +2420,15 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, // Check whether it is valid to call getCastOpcode for these types. // This routine must be kept in sync with getCastOpcode. -bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { +bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType()) return false; if (SrcTy == DestTy) return true; - if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) - if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy)) + if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) + if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy)) if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { // An element by element cast. Valid if casting the elements is valid. SrcTy = SrcVecTy->getElementType(); @@ -2212,8 +2490,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { // This routine must be kept in sync with isCastable. Instruction::CastOps CastInst::getCastOpcode( - const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) { - const Type *SrcTy = Src->getType(); + const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) { + Type *SrcTy = Src->getType(); assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() && "Only first class types are castable!"); @@ -2221,8 +2499,8 @@ CastInst::getCastOpcode( if (SrcTy == DestTy) return BitCast; - if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) - if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy)) + if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) + if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy)) if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { // An element by element cast. Find the appropriate opcode based on the // element types. @@ -2292,17 +2570,17 @@ CastInst::getCastOpcode( } else if (SrcTy->isIntegerTy()) { return IntToPtr; // int -> ptr } else { - assert(!"Casting pointer to other than pointer or int"); + assert(0 && "Casting pointer to other than pointer or int"); } } else if (DestTy->isX86_MMXTy()) { if (SrcTy->isVectorTy()) { assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX"); return BitCast; // 64-bit vector to MMX } else { - assert(!"Illegal cast to X86_MMX"); + assert(0 && "Illegal cast to X86_MMX"); } } else { - assert(!"Casting to type that is not first-class"); + assert(0 && "Casting to type that is not first-class"); } // If we fall through to here we probably hit an assertion cast above @@ -2320,10 +2598,10 @@ CastInst::getCastOpcode( /// it in one place and to eliminate the redundant code for getting the sizes /// of the types involved. bool -CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { +CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { // Check for type sanity on the arguments - const Type *SrcTy = S->getType(); + Type *SrcTy = S->getType(); if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() || SrcTy->isAggregateType() || DstTy->isAggregateType()) return false; @@ -2384,144 +2662,144 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { } TruncInst::TruncInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, Trunc, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc"); } TruncInst::TruncInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc"); } ZExtInst::ZExtInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, ZExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt"); } ZExtInst::ZExtInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt"); } SExtInst::SExtInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, SExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt"); } SExtInst::SExtInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, SExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt"); } FPTruncInst::FPTruncInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc"); } FPTruncInst::FPTruncInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc"); } FPExtInst::FPExtInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, FPExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt"); } FPExtInst::FPExtInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt"); } UIToFPInst::UIToFPInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP"); } UIToFPInst::UIToFPInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP"); } SIToFPInst::SIToFPInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP"); } SIToFPInst::SIToFPInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP"); } FPToUIInst::FPToUIInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI"); } FPToUIInst::FPToUIInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI"); } FPToSIInst::FPToSIInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI"); } FPToSIInst::FPToSIInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI"); } PtrToIntInst::PtrToIntInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } PtrToIntInst::PtrToIntInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } IntToPtrInst::IntToPtrInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr"); } IntToPtrInst::IntToPtrInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr"); } BitCastInst::BitCastInst( - Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore ) : CastInst(Ty, BitCast, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"); } BitCastInst::BitCastInst( - Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd ) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"); } @@ -2532,7 +2810,7 @@ BitCastInst::BitCastInst( void CmpInst::Anchor() const {} -CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate, +CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, Value *LHS, Value *RHS, const Twine &Name, Instruction *InsertBefore) : Instruction(ty, op, @@ -2545,7 +2823,7 @@ CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate, setName(Name); } -CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate, +CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, Value *LHS, Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(ty, op, @@ -2612,7 +2890,7 @@ bool CmpInst::isEquality() const { CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) { switch (pred) { - default: assert(!"Unknown cmp predicate!"); + default: assert(0 && "Unknown cmp predicate!"); case ICMP_EQ: return ICMP_NE; case ICMP_NE: return ICMP_EQ; case ICMP_UGT: return ICMP_ULE; @@ -2645,7 +2923,7 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) { ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { switch (pred) { - default: assert(! "Unknown icmp predicate!"); + default: assert(0 && "Unknown icmp predicate!"); case ICMP_EQ: case ICMP_NE: case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: return pred; @@ -2658,7 +2936,7 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) { switch (pred) { - default: assert(! "Unknown icmp predicate!"); + default: assert(0 && "Unknown icmp predicate!"); case ICMP_EQ: case ICMP_NE: case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: return pred; @@ -2734,7 +3012,7 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) { switch (pred) { - default: assert(!"Unknown cmp predicate!"); + default: assert(0 && "Unknown cmp predicate!"); case ICMP_EQ: case ICMP_NE: return pred; case ICMP_SGT: return ICMP_SLT; @@ -3065,14 +3343,34 @@ AllocaInst *AllocaInst::clone_impl() const { } LoadInst *LoadInst::clone_impl() const { - return new LoadInst(getOperand(0), - Twine(), isVolatile(), - getAlignment()); + return new LoadInst(getOperand(0), Twine(), isVolatile(), + getAlignment(), getOrdering(), getSynchScope()); } StoreInst *StoreInst::clone_impl() const { - return new StoreInst(getOperand(0), getOperand(1), - isVolatile(), getAlignment()); + return new StoreInst(getOperand(0), getOperand(1), isVolatile(), + getAlignment(), getOrdering(), getSynchScope()); + +} + +AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const { + AtomicCmpXchgInst *Result = + new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2), + getOrdering(), getSynchScope()); + Result->setVolatile(isVolatile()); + return Result; +} + +AtomicRMWInst *AtomicRMWInst::clone_impl() const { + AtomicRMWInst *Result = + new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1), + getOrdering(), getSynchScope()); + Result->setVolatile(isVolatile()); + return Result; +} + +FenceInst *FenceInst::clone_impl() const { + return new FenceInst(getContext(), getOrdering(), getSynchScope()); } TruncInst *TruncInst::clone_impl() const { @@ -3155,6 +3453,10 @@ PHINode *PHINode::clone_impl() const { return new PHINode(*this); } +LandingPadInst *LandingPadInst::clone_impl() const { + return new LandingPadInst(*this); +} + ReturnInst *ReturnInst::clone_impl() const { return new(getNumOperands()) ReturnInst(*this); } @@ -3176,6 +3478,10 @@ InvokeInst *InvokeInst::clone_impl() const { return new(getNumOperands()) InvokeInst(*this); } +ResumeInst *ResumeInst::clone_impl() const { + return new(1) ResumeInst(*this); +} + UnwindInst *UnwindInst::clone_impl() const { LLVMContext &Context = getContext(); return new UnwindInst(Context); diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.h b/contrib/llvm/lib/VMCore/LLVMContextImpl.h index 06a6f2a..a3f68fe 100644 --- a/contrib/llvm/lib/VMCore/LLVMContextImpl.h +++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.h @@ -42,8 +42,8 @@ class Value; struct DenseMapAPIntKeyInfo { struct KeyTy { APInt val; - const Type* type; - KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {} + Type* type; + KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {} KeyTy(const KeyTy& that) : val(that.val), type(that.type) {} bool operator==(const KeyTy& that) const { return type == that.type && this->val == that.val; diff --git a/contrib/llvm/lib/VMCore/Module.cpp b/contrib/llvm/lib/VMCore/Module.cpp index be2fcb8..c29029b 100644 --- a/contrib/llvm/lib/VMCore/Module.cpp +++ b/contrib/llvm/lib/VMCore/Module.cpp @@ -32,25 +32,10 @@ using namespace llvm; // Methods to implement the globals and functions lists. // -GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() { - GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()), - false, GlobalValue::ExternalLinkage); - // This should not be garbage monitored. - LeakDetector::removeGarbageObject(Ret); - return Ret; -} -GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() { - GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()), - GlobalValue::ExternalLinkage); - // This should not be garbage monitored. - LeakDetector::removeGarbageObject(Ret); - return Ret; -} - // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file. -template class llvm::SymbolTableListTraits<GlobalVariable, Module>; template class llvm::SymbolTableListTraits<Function, Module>; +template class llvm::SymbolTableListTraits<GlobalVariable, Module>; template class llvm::SymbolTableListTraits<GlobalAlias, Module>; //===----------------------------------------------------------------------===// @@ -82,8 +67,10 @@ Module::Endianness Module::getEndianness() const { Module::Endianness ret = AnyEndianness; while (!temp.empty()) { - StringRef token = DataLayout; - tie(token, temp) = getToken(temp, "-"); + std::pair<StringRef, StringRef> P = getToken(temp, "-"); + + StringRef token = P.first; + temp = P.second; if (token[0] == 'e') { ret = LittleEndian; @@ -95,15 +82,16 @@ Module::Endianness Module::getEndianness() const { return ret; } -/// Target Pointer Size information... +/// Target Pointer Size information. Module::PointerSize Module::getPointerSize() const { StringRef temp = DataLayout; Module::PointerSize ret = AnyPointerSize; while (!temp.empty()) { - StringRef token, signalToken; - tie(token, temp) = getToken(temp, "-"); - tie(signalToken, token) = getToken(token, ":"); + std::pair<StringRef, StringRef> TmpP = getToken(temp, "-"); + temp = TmpP.second; + TmpP = getToken(TmpP.first, ":"); + StringRef token = TmpP.second, signalToken = TmpP.first; if (signalToken[0] == 'p') { int size = 0; @@ -149,7 +137,7 @@ void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const { // the symbol table directly for this common task. // Constant *Module::getOrInsertFunction(StringRef Name, - const FunctionType *Ty, + FunctionType *Ty, AttrListPtr AttributeList) { // See if we have a definition for the specified function already. GlobalValue *F = getNamedValue(Name); @@ -182,7 +170,7 @@ Constant *Module::getOrInsertFunction(StringRef Name, } Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, - const FunctionType *Ty, + FunctionType *Ty, AttrListPtr AttributeList) { // See if we have a definition for the specified function already. GlobalValue *F = getNamedValue(Name); @@ -199,7 +187,7 @@ Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, } Constant *Module::getOrInsertFunction(StringRef Name, - const FunctionType *Ty) { + FunctionType *Ty) { AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0); return getOrInsertFunction(Name, Ty, AttributeList); } @@ -211,7 +199,7 @@ Constant *Module::getOrInsertFunction(StringRef Name, // Constant *Module::getOrInsertFunction(StringRef Name, AttrListPtr AttributeList, - const Type *RetTy, ...) { + Type *RetTy, ...) { va_list Args; va_start(Args, RetTy); @@ -229,7 +217,7 @@ Constant *Module::getOrInsertFunction(StringRef Name, } Constant *Module::getOrInsertFunction(StringRef Name, - const Type *RetTy, ...) { + Type *RetTy, ...) { va_list Args; va_start(Args, RetTy); @@ -279,7 +267,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, /// with a constantexpr cast to the right type. /// 3. Finally, if the existing global is the correct delclaration, return the /// existing global. -Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) { +Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) { // See if we have a definition for the specified global already. GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)); if (GV == 0) { @@ -436,7 +424,7 @@ namespace { // To avoid walking constant expressions multiple times and other IR // objects, we keep several helper maps. DenseSet<const Value*> VisitedConstants; - DenseSet<const Type*> VisitedTypes; + DenseSet<Type*> VisitedTypes; std::vector<StructType*> &StructTypes; public: @@ -549,5 +537,3 @@ namespace { void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const { TypeFinder(StructTypes).run(*this); } - - diff --git a/contrib/llvm/lib/VMCore/PassManager.cpp b/contrib/llvm/lib/VMCore/PassManager.cpp index 5cf2905..ecedb1d 100644 --- a/contrib/llvm/lib/VMCore/PassManager.cpp +++ b/contrib/llvm/lib/VMCore/PassManager.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Mutex.h" #include "llvm/ADT/StringMap.h" #include <algorithm> -#include <cstdio> #include <map> using namespace llvm; @@ -167,8 +166,8 @@ class BBPassManager : public PMDataManager, public FunctionPass { public: static char ID; - explicit BBPassManager(int Depth) - : PMDataManager(Depth), FunctionPass(ID) {} + explicit BBPassManager() + : PMDataManager(), FunctionPass(ID) {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. @@ -193,7 +192,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n"; + llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); BP->dumpPassStructure(Offset + 1); @@ -228,9 +227,9 @@ private: bool wasRun; public: static char ID; - explicit FunctionPassManagerImpl(int Depth) : - Pass(PT_PassManager, ID), PMDataManager(Depth), - PMTopLevelManager(new FPPassManager(1)), wasRun(false) {} + explicit FunctionPassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new FPPassManager()), wasRun(false) {} /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass @@ -303,8 +302,8 @@ char FunctionPassManagerImpl::ID = 0; class MPPassManager : public Pass, public PMDataManager { public: static char ID; - explicit MPPassManager(int Depth) : - Pass(PT_PassManager, ID), PMDataManager(Depth) { } + explicit MPPassManager() : + Pass(PT_PassManager, ID), PMDataManager() { } // Delete on the fly managers. virtual ~MPPassManager() { @@ -349,7 +348,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n"; + llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { ModulePass *MP = getContainedPass(Index); MP->dumpPassStructure(Offset + 1); @@ -388,9 +387,9 @@ class PassManagerImpl : public Pass, public: static char ID; - explicit PassManagerImpl(int Depth) : - Pass(PT_PassManager, ID), PMDataManager(Depth), - PMTopLevelManager(new MPPassManager(1)) {} + explicit PassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new MPPassManager()) {} /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass @@ -1340,7 +1339,7 @@ bool BBPassManager::doFinalization(Function &F) { /// Create new Function pass manager FunctionPassManager::FunctionPassManager(Module *m) : M(m) { - FPM = new FunctionPassManagerImpl(0); + FPM = new FunctionPassManagerImpl(); // FPM is the top level manager. FPM->setTopLevelManager(FPM); @@ -1532,7 +1531,7 @@ bool FPPassManager::runOnModule(Module &M) { bool Changed = doInitialization(M); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - runOnFunction(*I); + Changed |= runOnFunction(*I); return doFinalization(M) || Changed; } @@ -1626,7 +1625,7 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { FunctionPassManagerImpl *FPP = OnTheFlyManagers[P]; if (!FPP) { - FPP = new FunctionPassManagerImpl(0); + FPP = new FunctionPassManagerImpl(); // FPP is the top level manager. FPP->setTopLevelManager(FPP); @@ -1635,9 +1634,11 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { FPP->add(RequiredPass); // Register P as the last user of RequiredPass. - SmallVector<Pass *, 1> LU; - LU.push_back(RequiredPass); - FPP->setLastUser(LU, P); + if (RequiredPass) { + SmallVector<Pass *, 1> LU; + LU.push_back(RequiredPass); + FPP->setLastUser(LU, P); + } } /// Return function pass corresponding to PassInfo PI, that is @@ -1677,7 +1678,7 @@ bool PassManagerImpl::run(Module &M) { /// Create new pass manager PassManager::PassManager() { - PM = new PassManagerImpl(0); + PM = new PassManagerImpl(); // PM is the top level manager PM->setTopLevelManager(PM); } @@ -1761,13 +1762,23 @@ void PMStack::pop() { // Push PM on the stack and set its top level manager. void PMStack::push(PMDataManager *PM) { assert(PM && "Unable to push. Pass Manager expected"); + assert(PM->getDepth()==0 && "Pass Manager depth set too early"); if (!this->empty()) { + assert(PM->getPassManagerType() > this->top()->getPassManagerType() + && "pushing bad pass manager to PMStack"); PMTopLevelManager *TPM = this->top()->getTopLevelManager(); assert(TPM && "Unable to find top level manager"); TPM->addIndirectPassManager(PM); PM->setTopLevelManager(TPM); + PM->setDepth(this->top()->getDepth()+1); + } + else { + assert((PM->getPassManagerType() == PMT_ModulePassManager + || PM->getPassManagerType() == PMT_FunctionPassManager) + && "pushing bad pass manager to PMStack"); + PM->setDepth(1); } S.push_back(PM); @@ -1777,10 +1788,10 @@ void PMStack::push(PMDataManager *PM) { void PMStack::dump() const { for (std::vector<PMDataManager *>::const_iterator I = S.begin(), E = S.end(); I != E; ++I) - printf("%s ", (*I)->getAsPass()->getPassName()); + dbgs() << (*I)->getAsPass()->getPassName() << ' '; if (!S.empty()) - printf("\n"); + dbgs() << '\n'; } /// Find appropriate Module Pass Manager in the PM Stack and @@ -1823,7 +1834,7 @@ void FunctionPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Function Pass Manager - FPP = new FPPassManager(PMD->getDepth() + 1); + FPP = new FPPassManager(); FPP->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager @@ -1860,7 +1871,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Basic Block Manager - BBP = new BBPassManager(PMD->getDepth() + 1); + BBP = new BBPassManager(); // [2] Set up new manager's top level manager // Basic Block Pass Manager does not live by itself diff --git a/contrib/llvm/lib/VMCore/PassRegistry.cpp b/contrib/llvm/lib/VMCore/PassRegistry.cpp index fa92620..2df6557 100644 --- a/contrib/llvm/lib/VMCore/PassRegistry.cpp +++ b/contrib/llvm/lib/VMCore/PassRegistry.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Function.h" #include <vector> using namespace llvm; diff --git a/contrib/llvm/lib/VMCore/Type.cpp b/contrib/llvm/lib/VMCore/Type.cpp index f874d1b..10184bc 100644 --- a/contrib/llvm/lib/VMCore/Type.cpp +++ b/contrib/llvm/lib/VMCore/Type.cpp @@ -40,8 +40,8 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { /// getScalarType - If this is a vector type, return the element type, /// otherwise return this. -const Type *Type::getScalarType() const { - if (const VectorType *VTy = dyn_cast<VectorType>(this)) +Type *Type::getScalarType() { + if (VectorType *VTy = dyn_cast<VectorType>(this)) return VTy->getElementType(); return this; } @@ -77,7 +77,7 @@ bool Type::isFPOrFPVectorTy() const { // canLosslesslyBitCastTo - Return true if this type can be converted to // 'Ty' without any reinterpretation of bits. For example, i8* to i32*. // -bool Type::canLosslesslyBitCastTo(const Type *Ty) const { +bool Type::canLosslesslyBitCastTo(Type *Ty) const { // Identity cast means no change so return true if (this == Ty) return true; @@ -146,7 +146,7 @@ unsigned Type::getPrimitiveSizeInBits() const { /// getScalarSizeInBits - If this is a vector type, return the /// getPrimitiveSizeInBits value for the element type. Otherwise return the /// getPrimitiveSizeInBits value for this type. -unsigned Type::getScalarSizeInBits() const { +unsigned Type::getScalarSizeInBits() { return getScalarType()->getPrimitiveSizeInBits(); } @@ -306,7 +306,7 @@ APInt IntegerType::getMask() const { // FunctionType Implementation //===----------------------------------------------------------------------===// -FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params, +FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs) : Type(Result->getContext(), FunctionTyID) { Type **SubTys = reinterpret_cast<Type**>(this+1); @@ -326,7 +326,7 @@ FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params, } // FunctionType::get - The factory function for the FunctionType class. -FunctionType *FunctionType::get(const Type *ReturnType, +FunctionType *FunctionType::get(Type *ReturnType, ArrayRef<Type*> Params, bool isVarArg) { // TODO: This is brutally slow. std::vector<Type*> Key; @@ -351,21 +351,21 @@ FunctionType *FunctionType::get(const Type *ReturnType, } -FunctionType *FunctionType::get(const Type *Result, bool isVarArg) { +FunctionType *FunctionType::get(Type *Result, bool isVarArg) { return get(Result, ArrayRef<Type *>(), isVarArg); } /// isValidReturnType - Return true if the specified type is valid as a return /// type. -bool FunctionType::isValidReturnType(const Type *RetTy) { +bool FunctionType::isValidReturnType(Type *RetTy) { return !RetTy->isFunctionTy() && !RetTy->isLabelTy() && !RetTy->isMetadataTy(); } /// isValidArgumentType - Return true if the specified type is valid as an /// argument type. -bool FunctionType::isValidArgumentType(const Type *ArgTy) { +bool FunctionType::isValidArgumentType(Type *ArgTy) { return ArgTy->isFirstClassType(); } @@ -392,7 +392,7 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, // Value not found. Create a new type! ST = new (Context.pImpl->TypeAllocator) StructType(Context); - ST->setSubclassData(SCDB_IsAnonymous); // Anonymous struct. + ST->setSubclassData(SCDB_IsLiteral); // Literal struct. ST->setBody(ETypes, isPacked); return ST; } @@ -412,13 +412,6 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) { NumContainedTys = Elements.size(); } -StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) { - StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context); - if (!Name.empty()) - ST->setName(Name); - return ST; -} - void StructType::setName(StringRef Name) { if (Name == getName()) return; @@ -461,6 +454,13 @@ void StructType::setName(StringRef Name) { //===----------------------------------------------------------------------===// // StructType Helper functions. +StructType *StructType::create(LLVMContext &Context, StringRef Name) { + StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context); + if (!Name.empty()) + ST->setName(Name); + return ST; +} + StructType *StructType::get(LLVMContext &Context, bool isPacked) { return get(Context, llvm::ArrayRef<Type*>(), isPacked); } @@ -478,21 +478,36 @@ StructType *StructType::get(Type *type, ...) { return llvm::StructType::get(Ctx, StructFields); } -StructType *StructType::createNamed(LLVMContext &Context, StringRef Name, - ArrayRef<Type*> Elements, bool isPacked) { - StructType *ST = createNamed(Context, Name); +StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements, + StringRef Name, bool isPacked) { + StructType *ST = create(Context, Name); ST->setBody(Elements, isPacked); return ST; } -StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements, - bool isPacked) { +StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements) { + return create(Context, Elements, StringRef()); +} + +StructType *StructType::create(LLVMContext &Context) { + return create(Context, StringRef()); +} + + +StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name, + bool isPacked) { + assert(!Elements.empty() && + "This method may not be invoked with an empty list"); + return create(Elements[0]->getContext(), Elements, Name, isPacked); +} + +StructType *StructType::create(ArrayRef<Type*> Elements) { assert(!Elements.empty() && "This method may not be invoked with an empty list"); - return createNamed(Elements[0]->getContext(), Name, Elements, isPacked); + return create(Elements[0]->getContext(), Elements, StringRef()); } -StructType *StructType::createNamed(StringRef Name, Type *type, ...) { +StructType *StructType::create(StringRef Name, Type *type, ...) { assert(type != 0 && "Cannot create a struct type with no elements with this"); LLVMContext &Ctx = type->getContext(); va_list ap; @@ -502,11 +517,12 @@ StructType *StructType::createNamed(StringRef Name, Type *type, ...) { StructFields.push_back(type); type = va_arg(ap, llvm::Type*); } - return llvm::StructType::createNamed(Ctx, Name, StructFields); + return llvm::StructType::create(Ctx, StructFields, Name); } + StringRef StructType::getName() const { - assert(!isAnonymous() && "Anonymous structs never have names"); + assert(!isLiteral() && "Literal structs never have names"); if (SymbolTableEntry == 0) return StringRef(); return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey(); @@ -524,14 +540,14 @@ void StructType::setBody(Type *type, ...) { setBody(StructFields); } -bool StructType::isValidElementType(const Type *ElemTy) { +bool StructType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } /// isLayoutIdentical - Return true if this is layout identical to the /// specified struct. -bool StructType::isLayoutIdentical(const StructType *Other) const { +bool StructType::isLayoutIdentical(StructType *Other) const { if (this == Other) return true; if (isPacked() != Other->isPacked() || @@ -557,8 +573,8 @@ StructType *Module::getTypeByName(StringRef Name) const { // CompositeType Implementation //===----------------------------------------------------------------------===// -Type *CompositeType::getTypeAtIndex(const Value *V) const { - if (const StructType *STy = dyn_cast<StructType>(this)) { +Type *CompositeType::getTypeAtIndex(const Value *V) { + if (StructType *STy = dyn_cast<StructType>(this)) { unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); assert(indexValid(Idx) && "Invalid structure index!"); return STy->getElementType(Idx); @@ -566,8 +582,8 @@ Type *CompositeType::getTypeAtIndex(const Value *V) const { return cast<SequentialType>(this)->getElementType(); } -Type *CompositeType::getTypeAtIndex(unsigned Idx) const { - if (const StructType *STy = dyn_cast<StructType>(this)) { +Type *CompositeType::getTypeAtIndex(unsigned Idx) { + if (StructType *STy = dyn_cast<StructType>(this)) { assert(indexValid(Idx) && "Invalid structure index!"); return STy->getElementType(Idx); } @@ -605,7 +621,7 @@ ArrayType::ArrayType(Type *ElType, uint64_t NumEl) } -ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) { +ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) { Type *ElementType = const_cast<Type*>(elementType); assert(isValidElementType(ElementType) && "Invalid type for array element!"); @@ -618,7 +634,7 @@ ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) { return Entry; } -bool ArrayType::isValidElementType(const Type *ElemTy) { +bool ArrayType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } @@ -632,7 +648,7 @@ VectorType::VectorType(Type *ElType, unsigned NumEl) NumElements = NumEl; } -VectorType *VectorType::get(const Type *elementType, unsigned NumElements) { +VectorType *VectorType::get(Type *elementType, unsigned NumElements) { Type *ElementType = const_cast<Type*>(elementType); assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); assert(isValidElementType(ElementType) && @@ -647,7 +663,7 @@ VectorType *VectorType::get(const Type *elementType, unsigned NumElements) { return Entry; } -bool VectorType::isValidElementType(const Type *ElemTy) { +bool VectorType::isValidElementType(Type *ElemTy) { return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy(); } @@ -655,8 +671,7 @@ bool VectorType::isValidElementType(const Type *ElemTy) { // PointerType Implementation //===----------------------------------------------------------------------===// -PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) { - Type *EltTy = const_cast<Type*>(eltTy); +PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) { assert(EltTy && "Can't get a pointer to <null> type!"); assert(isValidElementType(EltTy) && "Invalid type for pointer element!"); @@ -677,11 +692,11 @@ PointerType::PointerType(Type *E, unsigned AddrSpace) setSubclassData(AddrSpace); } -PointerType *Type::getPointerTo(unsigned addrs) const { +PointerType *Type::getPointerTo(unsigned addrs) { return PointerType::get(this, addrs); } -bool PointerType::isValidElementType(const Type *ElemTy) { +bool PointerType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy(); } diff --git a/contrib/llvm/lib/VMCore/Value.cpp b/contrib/llvm/lib/VMCore/Value.cpp index f1815e3..2fa5f08 100644 --- a/contrib/llvm/lib/VMCore/Value.cpp +++ b/contrib/llvm/lib/VMCore/Value.cpp @@ -35,12 +35,12 @@ using namespace llvm; // Value Class //===----------------------------------------------------------------------===// -static inline Type *checkType(const Type *Ty) { +static inline Type *checkType(Type *Ty) { assert(Ty && "Value defined with a null type: Error!"); return const_cast<Type*>(Ty); } -Value::Value(const Type *ty, unsigned scid) +Value::Value(Type *ty, unsigned scid) : SubclassID(scid), HasValueHandle(0), SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)), UseList(0), Name(0) { @@ -369,7 +369,7 @@ bool Value::isDereferenceablePointer() const { for (User::const_op_iterator I = GEP->op_begin()+1, E = GEP->op_end(); I != E; ++I) { Value *Index = *I; - const Type *Ty = *GTI++; + Type *Ty = *GTI++; // Struct indices can't be out of bounds. if (isa<StructType>(Ty)) continue; @@ -380,7 +380,7 @@ bool Value::isDereferenceablePointer() const { if (CI->isZero()) continue; // Check to see that it's within the bounds of an array. - const ArrayType *ATy = dyn_cast<ArrayType>(Ty); + ArrayType *ATy = dyn_cast<ArrayType>(Ty); if (!ATy) return false; if (CI->getValue().getActiveBits() > 64) diff --git a/contrib/llvm/lib/VMCore/ValueTypes.cpp b/contrib/llvm/lib/VMCore/ValueTypes.cpp index 21a1f03..e13bd7d 100644 --- a/contrib/llvm/lib/VMCore/ValueTypes.cpp +++ b/contrib/llvm/lib/VMCore/ValueTypes.cpp @@ -19,6 +19,12 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +EVT EVT::changeExtendedVectorElementTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits()); + return getVectorVT(Context, IntTy, getVectorNumElements()); +} + EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) { EVT VT; VT.LLVMTy = IntegerType::get(Context, BitWidth); @@ -77,9 +83,9 @@ unsigned EVT::getExtendedVectorNumElements() const { unsigned EVT::getExtendedSizeInBits() const { assert(isExtended() && "Type is not extended!"); - if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy)) + if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy)) return ITy->getBitWidth(); - if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy)) + if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy)) return VTy->getBitWidth(); assert(false && "Unrecognized extended type!"); return 0; // Suppress warnings. @@ -140,7 +146,7 @@ std::string EVT::getEVTString() const { /// getTypeForEVT - This method returns an LLVM type corresponding to the /// specified EVT. For integer types, this returns an unsigned type. Note /// that this will abort for types that cannot be represented. -const Type *EVT::getTypeForEVT(LLVMContext &Context) const { +Type *EVT::getTypeForEVT(LLVMContext &Context) const { switch (V.SimpleTy) { default: assert(isExtended() && "Type is not extended!"); @@ -186,7 +192,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const { /// getEVT - Return the value type corresponding to the specified type. This /// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types /// are returned as Other, otherwise they are invalid. -EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){ +EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: if (HandleUnknown) return MVT(MVT::Other); @@ -204,7 +210,7 @@ EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){ case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); case Type::PointerTyID: return MVT(MVT::iPTR); case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); + VectorType *VTy = cast<VectorType>(Ty); return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), VTy->getNumElements()); } diff --git a/contrib/llvm/lib/VMCore/Verifier.cpp b/contrib/llvm/lib/VMCore/Verifier.cpp index b146b89..9564b7d 100644 --- a/contrib/llvm/lib/VMCore/Verifier.cpp +++ b/contrib/llvm/lib/VMCore/Verifier.cpp @@ -35,6 +35,12 @@ // * It is illegal to have a ret instruction that returns a value that does not // agree with the function return value type. // * Function call argument types match the function prototype +// * A landing pad is defined by a landingpad instruction, and can be jumped to +// only by the unwind edge of an invoke instruction. +// * A landingpad instruction must be the first non-PHI instruction in the +// block. +// * All landingpad instructions must use the same personality function with +// the same function. // * All other things that are tested by asserts spread about the code... // //===----------------------------------------------------------------------===// @@ -131,18 +137,22 @@ namespace { /// already. SmallPtrSet<MDNode *, 32> MDNodes; + /// PersonalityFn - The personality function referenced by the + /// LandingPadInsts. All LandingPadInsts within the same function must use + /// the same personality function. + const Value *PersonalityFn; + Verifier() - : FunctionPass(ID), - Broken(false), RealPass(true), action(AbortProcessAction), - Mod(0), Context(0), DT(0), MessagesStr(Messages) { - initializeVerifierPass(*PassRegistry::getPassRegistry()); - } + : FunctionPass(ID), Broken(false), RealPass(true), + action(AbortProcessAction), Mod(0), Context(0), DT(0), + MessagesStr(Messages), PersonalityFn(0) { + initializeVerifierPass(*PassRegistry::getPassRegistry()); + } explicit Verifier(VerifierFailureAction ctn) - : FunctionPass(ID), - Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0), - MessagesStr(Messages) { - initializeVerifierPass(*PassRegistry::getPassRegistry()); - } + : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0), + Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) { + initializeVerifierPass(*PassRegistry::getPassRegistry()); + } bool doInitialization(Module &M) { Mod = &M; @@ -165,6 +175,7 @@ namespace { visit(F); InstsInThisBlock.clear(); + PersonalityFn = 0; // If this is a real pass, in a pass manager, we must abort before // returning back to the pass manager, or else the pass manager may try to @@ -278,18 +289,22 @@ namespace { void visitUserOp1(Instruction &I); void visitUserOp2(Instruction &I) { visitUserOp1(I); } void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI); + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI); + void visitAtomicRMWInst(AtomicRMWInst &RMWI); + void visitFenceInst(FenceInst &FI); void visitAllocaInst(AllocaInst &AI); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitLandingPadInst(LandingPadInst &LPI); void VerifyCallSite(CallSite CS); - bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, + bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix); void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, unsigned RetNum, unsigned ParamNum, ...); - void VerifyParameterAttrs(Attributes Attrs, const Type *Ty, + void VerifyParameterAttrs(Attributes Attrs, Type *Ty, bool isReturnValue, const Value *V); - void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs, + void VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs, const Value *V); void WriteValue(const Value *V) { @@ -302,7 +317,7 @@ namespace { } } - void WriteType(const Type *T) { + void WriteType(Type *T) { if (!T) return; MessagesStr << ' ' << *T; } @@ -323,7 +338,7 @@ namespace { } void CheckFailed(const Twine &Message, const Value *V1, - const Type *T2, const Value *V3 = 0) { + Type *T2, const Value *V3 = 0) { MessagesStr << Message.str() << "\n"; WriteValue(V1); WriteType(T2); @@ -331,8 +346,8 @@ namespace { Broken = true; } - void CheckFailed(const Twine &Message, const Type *T1, - const Type *T2 = 0, const Type *T3 = 0) { + void CheckFailed(const Twine &Message, Type *T1, + Type *T2 = 0, Type *T3 = 0) { MessagesStr << Message.str() << "\n"; WriteType(T1); WriteType(T2); @@ -421,9 +436,9 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { "invalid linkage for intrinsic global variable", &GV); // Don't worry about emitting an error for it not being an array, // visitGlobalValue will complain on appending non-array. - if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) { - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - const PointerType *FuncPtrTy = + if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) { + StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + PointerType *FuncPtrTy = FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo(); Assert1(STy && STy->getNumElements() == 2 && STy->getTypeAtIndex(0u)->isIntegerTy(32) && @@ -514,7 +529,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. -void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty, +void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty, bool isReturnValue, const Value *V) { if (Attrs == Attribute::None) return; @@ -541,7 +556,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty, Attribute::getAsString(TypeI), V); Attributes ByValI = Attrs & Attribute::ByVal; - if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) { Assert1(!ByValI || PTy->getElementType()->isSized(), "Attribute " + Attribute::getAsString(ByValI) + " does not support unsized types!", V); @@ -554,7 +569,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty, // VerifyFunctionAttrs - Check parameter attributes against a function type. // The value V is printed in error messages. -void Verifier::VerifyFunctionAttrs(const FunctionType *FT, +void Verifier::VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs, const Value *V) { if (Attrs.isEmpty()) @@ -565,7 +580,7 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT, for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { const AttributeWithIndex &Attr = Attrs.getSlot(i); - const Type *Ty; + Type *Ty; if (Attr.Index == 0) Ty = FT->getReturnType(); else if (Attr.Index-1 < FT->getNumParams()) @@ -615,7 +630,7 @@ static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) { // void Verifier::visitFunction(Function &F) { // Check function arguments. - const FunctionType *FT = F.getFunctionType(); + FunctionType *FT = F.getFunctionType(); unsigned NumArgs = F.arg_size(); Assert1(Context == &F.getContext(), @@ -795,7 +810,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) { void Verifier::visitSwitchInst(SwitchInst &SI) { // Check to make sure that all of the constants in the switch instruction // have the same type as the switched-on value. - const Type *SwitchTy = SI.getCondition()->getType(); + Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet<ConstantInt*, 32> Constants; for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) { Assert1(SI.getCaseValue(i)->getType() == SwitchTy, @@ -836,8 +851,8 @@ void Verifier::visitUserOp1(Instruction &I) { void Verifier::visitTruncInst(TruncInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); @@ -854,8 +869,8 @@ void Verifier::visitTruncInst(TruncInst &I) { void Verifier::visitZExtInst(ZExtInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); @@ -872,8 +887,8 @@ void Verifier::visitZExtInst(ZExtInst &I) { void Verifier::visitSExtInst(SExtInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); @@ -890,8 +905,8 @@ void Verifier::visitSExtInst(SExtInst &I) { void Verifier::visitFPTruncInst(FPTruncInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); @@ -907,8 +922,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { void Verifier::visitFPExtInst(FPExtInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); @@ -925,8 +940,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) { void Verifier::visitUIToFPInst(UIToFPInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); @@ -948,8 +963,8 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) { void Verifier::visitSIToFPInst(SIToFPInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); @@ -971,8 +986,8 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) { void Verifier::visitFPToUIInst(FPToUIInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); @@ -994,8 +1009,8 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) { void Verifier::visitFPToSIInst(FPToSIInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); @@ -1017,8 +1032,8 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { void Verifier::visitPtrToIntInst(PtrToIntInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I); Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I); @@ -1028,8 +1043,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { void Verifier::visitIntToPtrInst(IntToPtrInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I); Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I); @@ -1039,8 +1054,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { void Verifier::visitBitCastInst(BitCastInst &I) { // Get the source and destination types - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DestTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); @@ -1090,11 +1105,11 @@ void Verifier::VerifyCallSite(CallSite CS) { Assert1(CS.getCalledValue()->getType()->isPointerTy(), "Called function must be a pointer!", I); - const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType()); + PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType()); Assert1(FPTy->getElementType()->isFunctionTy(), "Called function is not pointer to function type!", I); - const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType()); + FunctionType *FTy = cast<FunctionType>(FPTy->getElementType()); // Verify that the correct number of arguments are being passed if (FTy->isVarArg()) @@ -1152,6 +1167,12 @@ void Verifier::visitCallInst(CallInst &CI) { void Verifier::visitInvokeInst(InvokeInst &II) { VerifyCallSite(&II); + + // Verify that there is a landingpad instruction as the first non-PHI + // instruction of the 'unwind' destination. + Assert1(II.getUnwindDest()->isLandingPad(), + "The unwind destination does not have a landingpad instruction!",&II); + visitTerminatorInst(II); } @@ -1219,8 +1240,8 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { void Verifier::visitICmpInst(ICmpInst &IC) { // Check that the operands are the same type - const Type *Op0Ty = IC.getOperand(0)->getType(); - const Type *Op1Ty = IC.getOperand(1)->getType(); + Type *Op0Ty = IC.getOperand(0)->getType(); + Type *Op1Ty = IC.getOperand(1)->getType(); Assert1(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type @@ -1236,8 +1257,8 @@ void Verifier::visitICmpInst(ICmpInst &IC) { void Verifier::visitFCmpInst(FCmpInst &FC) { // Check that the operands are the same type - const Type *Op0Ty = FC.getOperand(0)->getType(); - const Type *Op1Ty = FC.getOperand(1)->getType(); + Type *Op0Ty = FC.getOperand(0)->getType(); + Type *Op1Ty = FC.getOperand(1)->getType(); Assert1(Op0Ty == Op1Ty, "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type @@ -1274,10 +1295,13 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) { } void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { + Assert1(cast<PointerType>(GEP.getOperand(0)->getType()) + ->getElementType()->isSized(), + "GEP into unsized type!", &GEP); + SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end()); - const Type *ElTy = - GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), - Idxs.begin(), Idxs.end()); + Type *ElTy = + GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs); Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP); Assert2(GEP.getType()->isPointerTy() && cast<PointerType>(GEP.getType())->getElementType() == ElTy, @@ -1286,26 +1310,44 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { } void Verifier::visitLoadInst(LoadInst &LI) { - const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType()); + PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType()); Assert1(PTy, "Load operand must be a pointer.", &LI); - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); Assert2(ElTy == LI.getType(), "Load result type does not match pointer operand type!", &LI, ElTy); + if (LI.isAtomic()) { + Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease, + "Load cannot have Release ordering", &LI); + Assert1(LI.getAlignment() != 0, + "Atomic load must specify explicit alignment", &LI); + } else { + Assert1(LI.getSynchScope() == CrossThread, + "Non-atomic load cannot have SynchronizationScope specified", &LI); + } visitInstruction(LI); } void Verifier::visitStoreInst(StoreInst &SI) { - const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType()); + PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType()); Assert1(PTy, "Store operand must be a pointer.", &SI); - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); Assert2(ElTy == SI.getOperand(0)->getType(), "Stored value type does not match pointer operand type!", &SI, ElTy); + if (SI.isAtomic()) { + Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease, + "Store cannot have Acquire ordering", &SI); + Assert1(SI.getAlignment() != 0, + "Atomic store must specify explicit alignment", &SI); + } else { + Assert1(SI.getSynchScope() == CrossThread, + "Non-atomic store cannot have SynchronizationScope specified", &SI); + } visitInstruction(SI); } void Verifier::visitAllocaInst(AllocaInst &AI) { - const PointerType *PTy = AI.getType(); + PointerType *PTy = AI.getType(); Assert1(PTy->getAddressSpace() == 0, "Allocation instruction pointer not in the generic address space!", &AI); @@ -1316,6 +1358,49 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { visitInstruction(AI); } +void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { + Assert1(CXI.getOrdering() != NotAtomic, + "cmpxchg instructions must be atomic.", &CXI); + Assert1(CXI.getOrdering() != Unordered, + "cmpxchg instructions cannot be unordered.", &CXI); + PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType()); + Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI); + Type *ElTy = PTy->getElementType(); + Assert2(ElTy == CXI.getOperand(1)->getType(), + "Expected value type does not match pointer operand type!", + &CXI, ElTy); + Assert2(ElTy == CXI.getOperand(2)->getType(), + "Stored value type does not match pointer operand type!", + &CXI, ElTy); + visitInstruction(CXI); +} + +void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { + Assert1(RMWI.getOrdering() != NotAtomic, + "atomicrmw instructions must be atomic.", &RMWI); + Assert1(RMWI.getOrdering() != Unordered, + "atomicrmw instructions cannot be unordered.", &RMWI); + PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType()); + Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI); + Type *ElTy = PTy->getElementType(); + Assert2(ElTy == RMWI.getOperand(1)->getType(), + "Argument value type does not match pointer operand type!", + &RMWI, ElTy); + Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() && + RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP, + "Invalid binary operation!", &RMWI); + visitInstruction(RMWI); +} + +void Verifier::visitFenceInst(FenceInst &FI) { + const AtomicOrdering Ordering = FI.getOrdering(); + Assert1(Ordering == Acquire || Ordering == Release || + Ordering == AcquireRelease || Ordering == SequentiallyConsistent, + "fence instructions may only have " + "acquire, release, acq_rel, or seq_cst ordering.", &FI); + visitInstruction(FI); +} + void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), EVI.getIndices()) == @@ -1334,6 +1419,55 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) { visitInstruction(IVI); } +void Verifier::visitLandingPadInst(LandingPadInst &LPI) { + BasicBlock *BB = LPI.getParent(); + + // The landingpad instruction is ill-formed if it doesn't have any clauses and + // isn't a cleanup. + Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(), + "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); + + // The landingpad instruction defines its parent as a landing pad block. The + // landing pad block may be branched to only by the unwind edge of an invoke. + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()); + Assert1(II && II->getUnwindDest() == BB, + "Block containing LandingPadInst must be jumped to " + "only by the unwind edge of an invoke.", &LPI); + } + + // The landingpad instruction must be the first non-PHI instruction in the + // block. + Assert1(LPI.getParent()->getLandingPadInst() == &LPI, + "LandingPadInst not the first non-PHI instruction in the block.", + &LPI); + + // The personality functions for all landingpad instructions within the same + // function should match. + if (PersonalityFn) + Assert1(LPI.getPersonalityFn() == PersonalityFn, + "Personality function doesn't match others in function", &LPI); + PersonalityFn = LPI.getPersonalityFn(); + + // All operands must be constants. + Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!", + &LPI); + for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) { + Value *Clause = LPI.getClause(i); + Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI); + if (LPI.isCatch(i)) { + Assert1(isa<PointerType>(Clause->getType()), + "Catch operand does not have pointer type!", &LPI); + } else { + Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI); + Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause), + "Filter operand is not an array of constants!", &LPI); + } + } + + visitInstruction(LPI); +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -1588,20 +1722,20 @@ static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) { return "Intrinsic result type #" + utostr(ArgNo); } -bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, +bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix) { - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); unsigned NumElts = 0; - const Type *EltTy = Ty; - const VectorType *VTy = dyn_cast<VectorType>(Ty); + Type *EltTy = Ty; + VectorType *VTy = dyn_cast<VectorType>(Ty); if (VTy) { EltTy = VTy->getElementType(); NumElts = VTy->getNumElements(); } - const Type *RetTy = FTy->getReturnType(); - const StructType *ST = dyn_cast<StructType>(RetTy); + Type *RetTy = FTy->getReturnType(); + StructType *ST = dyn_cast<StructType>(RetTy); unsigned NumRetVals; if (RetTy->isVoidTy()) NumRetVals = 0; @@ -1618,7 +1752,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, // type. if ((Match & (ExtendedElementVectorType | TruncatedElementVectorType)) != 0) { - const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy); + IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy); if (!VTy || !IEltTy) { CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " "an integral vector type.", F); @@ -1709,7 +1843,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, // Outside of TableGen, we don't distinguish iPTRAny (to any address space) // and iPTR. In the verifier, we can not distinguish which case we have so // allow either case to be legal. - if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) { + if (PointerType* PTyp = dyn_cast<PointerType>(Ty)) { EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true); if (PointeeVT == MVT::Other) { CheckFailed("Intrinsic has pointer to complex type."); @@ -1757,7 +1891,7 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, unsigned NumParams, ...) { va_list VA; va_start(VA, NumParams); - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); // For overloaded intrinsics, the Suffix of the function name must match the // types of the arguments. This variable keeps track of the expected @@ -1769,8 +1903,8 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, return; } - const Type *Ty = FTy->getReturnType(); - const StructType *ST = dyn_cast<StructType>(Ty); + Type *Ty = FTy->getReturnType(); + StructType *ST = dyn_cast<StructType>(Ty); if (NumRetVals == 0 && !Ty->isVoidTy()) { CheckFailed("Intrinsic should return void", F); |